#include "pipe/p_context.h" #include "pipe/p_state.h" #include "util/u_inlines.h" #include "util/u_format.h" #include "translate/translate.h" #include "nvc0/nvc0_context.h" #include "nvc0/nvc0_resource.h" #include "nvc0/nvc0_3d.xml.h" struct push_context { struct nouveau_pushbuf *push; struct translate *translate; void *dest; const void *idxbuf; uint32_t vertex_size; uint32_t restart_index; uint32_t instance_id; boolean prim_restart; boolean need_vertex_id; struct { boolean enabled; boolean value; unsigned stride; const uint8_t *data; } edgeflag; }; static void nvc0_push_upload_vertex_ids(struct push_context *, struct nvc0_context *, const struct pipe_draw_info *); static void nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx) { ctx->push = nvc0->base.pushbuf; ctx->translate = nvc0->vertex->translate; ctx->vertex_size = nvc0->vertex->size; ctx->need_vertex_id = nvc0->vertprog->vp.need_vertex_id && (nvc0->vertex->num_elements < 32); ctx->edgeflag.value = TRUE; ctx->edgeflag.enabled = nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS; /* silence warnings */ ctx->edgeflag.data = NULL; ctx->edgeflag.stride = 0; } static INLINE void nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias) { struct translate *translate = nvc0->vertex->translate; unsigned i; for (i = 0; i < nvc0->num_vtxbufs; ++i) { const uint8_t *map; const struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i]; if (likely(!vb->buffer)) map = (const uint8_t *)vb->user_buffer; else map = nouveau_resource_map_offset(&nvc0->base, nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD); if (index_bias && !unlikely(nvc0->vertex->instance_bufs & (1 << i))) map += (intptr_t)index_bias * vb->stride; translate->set_buffer(translate, i, map, vb->stride, ~0); } } static INLINE void nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0) { if (nvc0->idxbuf.buffer) { struct nv04_resource *buf = nv04_resource(nvc0->idxbuf.buffer); ctx->idxbuf = nouveau_resource_map_offset(&nvc0->base, buf, nvc0->idxbuf.offset, NOUVEAU_BO_RD); } else { ctx->idxbuf = nvc0->idxbuf.user_buffer; } } static INLINE void nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0, int32_t index_bias) { unsigned attr = nvc0->vertprog->vp.edgeflag; struct pipe_vertex_element *ve = &nvc0->vertex->element[attr].pipe; struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index]; struct nv04_resource *buf = nv04_resource(vb->buffer); unsigned offset = vb->buffer_offset + ve->src_offset; ctx->edgeflag.stride = vb->stride; ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base, buf, offset, NOUVEAU_BO_RD); if (index_bias) ctx->edgeflag.data += (intptr_t)index_bias * vb->stride; } static INLINE unsigned prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index) { unsigned i; for (i = 0; i < push && elts[i] != index; ++i); return i; } static INLINE unsigned prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index) { unsigned i; for (i = 0; i < push && elts[i] != index; ++i); return i; } static INLINE unsigned prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index) { unsigned i; for (i = 0; i < push && elts[i] != index; ++i); return i; } static INLINE boolean ef_value(const struct push_context *ctx, uint32_t index) { float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride]; return *pf ? TRUE : FALSE; } static INLINE boolean ef_toggle(struct push_context *ctx) { ctx->edgeflag.value = !ctx->edgeflag.value; return ctx->edgeflag.value; } static INLINE unsigned ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n) { unsigned i; for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); return i; } static INLINE unsigned ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n) { unsigned i; for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); return i; } static INLINE unsigned ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n) { unsigned i; for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i); return i; } static INLINE unsigned ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n) { unsigned i; for (i = 0; i < n && ef_value(ctx, start++) == ctx->edgeflag.value; ++i); return i; } static INLINE void * nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count) { struct nouveau_pushbuf *push = nvc0->base.pushbuf; struct nouveau_bo *bo; uint64_t va; const unsigned size = count * nvc0->vertex->size; void *const dest = nouveau_scratch_get(&nvc0->base, size, &va, &bo); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2); PUSH_DATAh(push, va); PUSH_DATA (push, va); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); PUSH_DATAh(push, va + size - 1); PUSH_DATA (push, va + size - 1); BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, bo); nouveau_pushbuf_validate(push); return dest; } static void disp_vertices_i08(struct push_context *ctx, unsigned start, unsigned count) { struct nouveau_pushbuf *push = ctx->push; struct translate *translate = ctx->translate; const uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start; unsigned pos = 0; do { unsigned nR = count; if (unlikely(ctx->prim_restart)) nR = prim_restart_search_i08(elts, nR, ctx->restart_index); translate->run_elts8(translate, elts, nR, 0, ctx->instance_id, ctx->dest); count -= nR; ctx->dest += nR * ctx->vertex_size; while (nR) { unsigned nE = nR; if (unlikely(ctx->edgeflag.enabled)) nE = ef_toggle_search_i08(ctx, elts, nR); PUSH_SPACE(push, 4); if (likely(nE >= 2)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); PUSH_DATA (push, pos); PUSH_DATA (push, nE); } else if (nE) { if (pos <= 0xff) { IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); } else { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, pos); } } if (unlikely(nE != nR)) IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); pos += nE; elts += nE; nR -= nE; } if (count) { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, ctx->restart_index); ++elts; ctx->dest += ctx->vertex_size; ++pos; --count; } } while (count); } static void disp_vertices_i16(struct push_context *ctx, unsigned start, unsigned count) { struct nouveau_pushbuf *push = ctx->push; struct translate *translate = ctx->translate; const uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start; unsigned pos = 0; do { unsigned nR = count; if (unlikely(ctx->prim_restart)) nR = prim_restart_search_i16(elts, nR, ctx->restart_index); translate->run_elts16(translate, elts, nR, 0, ctx->instance_id, ctx->dest); count -= nR; ctx->dest += nR * ctx->vertex_size; while (nR) { unsigned nE = nR; if (unlikely(ctx->edgeflag.enabled)) nE = ef_toggle_search_i16(ctx, elts, nR); PUSH_SPACE(push, 4); if (likely(nE >= 2)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); PUSH_DATA (push, pos); PUSH_DATA (push, nE); } else if (nE) { if (pos <= 0xff) { IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); } else { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, pos); } } if (unlikely(nE != nR)) IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); pos += nE; elts += nE; nR -= nE; } if (count) { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, ctx->restart_index); ++elts; ctx->dest += ctx->vertex_size; ++pos; --count; } } while (count); } static void disp_vertices_i32(struct push_context *ctx, unsigned start, unsigned count) { struct nouveau_pushbuf *push = ctx->push; struct translate *translate = ctx->translate; const uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start; unsigned pos = 0; do { unsigned nR = count; if (unlikely(ctx->prim_restart)) nR = prim_restart_search_i32(elts, nR, ctx->restart_index); translate->run_elts(translate, elts, nR, 0, ctx->instance_id, ctx->dest); count -= nR; ctx->dest += nR * ctx->vertex_size; while (nR) { unsigned nE = nR; if (unlikely(ctx->edgeflag.enabled)) nE = ef_toggle_search_i32(ctx, elts, nR); PUSH_SPACE(push, 4); if (likely(nE >= 2)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); PUSH_DATA (push, pos); PUSH_DATA (push, nE); } else if (nE) { if (pos <= 0xff) { IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos); } else { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, pos); } } if (unlikely(nE != nR)) IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); pos += nE; elts += nE; nR -= nE; } if (count) { BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1); PUSH_DATA (push, ctx->restart_index); ++elts; ctx->dest += ctx->vertex_size; ++pos; --count; } } while (count); } static void disp_vertices_seq(struct push_context *ctx, unsigned start, unsigned count) { struct nouveau_pushbuf *push = ctx->push; struct translate *translate = ctx->translate; unsigned pos = 0; translate->run(translate, start, count, 0, ctx->instance_id, ctx->dest); do { unsigned nr = count; if (unlikely(ctx->edgeflag.enabled)) nr = ef_toggle_search_seq(ctx, start + pos, nr); PUSH_SPACE(push, 4); if (likely(nr)) { BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2); PUSH_DATA (push, pos); PUSH_DATA (push, nr); } if (unlikely(nr != count)) IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx)); pos += nr; count -= nr; } while (count); } #define NVC0_PRIM_GL_CASE(n) \ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n static INLINE unsigned nvc0_prim_gl(unsigned prim) { switch (prim) { NVC0_PRIM_GL_CASE(POINTS); NVC0_PRIM_GL_CASE(LINES); NVC0_PRIM_GL_CASE(LINE_LOOP); NVC0_PRIM_GL_CASE(LINE_STRIP); NVC0_PRIM_GL_CASE(TRIANGLES); NVC0_PRIM_GL_CASE(TRIANGLE_STRIP); NVC0_PRIM_GL_CASE(TRIANGLE_FAN); NVC0_PRIM_GL_CASE(QUADS); NVC0_PRIM_GL_CASE(QUAD_STRIP); NVC0_PRIM_GL_CASE(POLYGON); NVC0_PRIM_GL_CASE(LINES_ADJACENCY); NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY); NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY); NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY); /* NVC0_PRIM_GL_CASE(PATCHES); */ default: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS; } } void nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct push_context ctx; unsigned i, index_size; unsigned inst_count = info->instance_count; unsigned vert_count = info->count; unsigned prim; nvc0_push_context_init(nvc0, &ctx); nvc0_vertex_configure_translate(nvc0, info->index_bias); if (unlikely(ctx.edgeflag.enabled)) nvc0_push_map_edgeflag(&ctx, nvc0, info->index_bias); ctx.prim_restart = info->primitive_restart; ctx.restart_index = info->restart_index; if (info->indexed) { nvc0_push_map_idxbuf(&ctx, nvc0); index_size = nvc0->idxbuf.index_size; if (info->primitive_restart) { BEGIN_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 2); PUSH_DATA (ctx.push, 1); PUSH_DATA (ctx.push, info->restart_index); } else if (nvc0->state.prim_restart) { IMMED_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 0); } nvc0->state.prim_restart = info->primitive_restart; } else { if (unlikely(info->count_from_stream_output)) { struct pipe_context *pipe = &nvc0->base.pipe; struct nvc0_so_target *targ; targ = nvc0_so_target(info->count_from_stream_output); pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count); vert_count /= targ->stride; } ctx.idxbuf = NULL; /* shut up warnings */ index_size = 0; } ctx.instance_id = info->start_instance; prim = nvc0_prim_gl(info->mode); do { PUSH_SPACE(ctx.push, 9); ctx.dest = nvc0_push_setup_vertex_array(nvc0, vert_count); if (unlikely(!ctx.dest)) break; if (unlikely(ctx.need_vertex_id)) nvc0_push_upload_vertex_ids(&ctx, nvc0, info); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0); BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (ctx.push, prim); switch (index_size) { case 1: disp_vertices_i08(&ctx, info->start, vert_count); break; case 2: disp_vertices_i16(&ctx, info->start, vert_count); break; case 4: disp_vertices_i32(&ctx, info->start, vert_count); break; default: assert(index_size == 0); disp_vertices_seq(&ctx, info->start, vert_count); break; } PUSH_SPACE(ctx.push, 1); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0); if (--inst_count) { prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; ++ctx.instance_id; } nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP); nouveau_scratch_done(&nvc0->base); } while (inst_count); /* reset state and unmap buffers (no-op) */ if (unlikely(!ctx.edgeflag.value)) { PUSH_SPACE(ctx.push, 1); IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1); } if (unlikely(ctx.need_vertex_id)) { PUSH_SPACE(ctx.push, 4); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0); BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(1)), 1); PUSH_DATA (ctx.push, NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST | NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32); IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 0); } if (info->indexed) nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer)); for (i = 0; i < nvc0->num_vtxbufs; ++i) nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer)); NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1); } static INLINE void copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n) { unsigned i; for (i = 0; i < n; ++i) dst[i] = elts[i] + bias; } static INLINE void copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n) { unsigned i; for (i = 0; i < n; ++i) dst[i] = elts[i] + bias; } static INLINE void copy_indices_u32(uint32_t *dst, const uint32_t *elts, uint32_t bias, unsigned n) { unsigned i; for (i = 0; i < n; ++i) dst[i] = elts[i] + bias; } static void nvc0_push_upload_vertex_ids(struct push_context *ctx, struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct nouveau_pushbuf *push = ctx->push; struct nouveau_bo *bo; uint64_t va; uint32_t *data; uint32_t format; unsigned index_size = nvc0->idxbuf.index_size; unsigned i; unsigned a = nvc0->vertex->num_elements; if (!index_size || info->index_bias) index_size = 4; data = (uint32_t *)nouveau_scratch_get(&nvc0->base, info->count * index_size, &va, &bo); BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD, bo); nouveau_pushbuf_validate(push); if (info->indexed) { if (!info->index_bias) { memcpy(data, ctx->idxbuf, info->count * index_size); } else { switch (nvc0->idxbuf.index_size) { case 1: copy_indices_u8(data, ctx->idxbuf, info->index_bias, info->count); break; case 2: copy_indices_u16(data, ctx->idxbuf, info->index_bias, info->count); break; default: copy_indices_u32(data, ctx->idxbuf, info->index_bias, info->count); break; } } } else { for (i = 0; i < info->count; ++i) data[i] = i + (info->start + info->index_bias); } format = (1 << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) | NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT; switch (index_size) { case 1: format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8; break; case 2: format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16; break; default: format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32; break; } PUSH_SPACE(push, 12); if (unlikely(nvc0->state.instance_elts & 2)) { nvc0->state.instance_elts &= ~2; IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(1)), 0); } BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1); PUSH_DATA (push, format); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 3); PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size); PUSH_DATAh(push, va); PUSH_DATA (push, va); BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2); PUSH_DATAh(push, va + info->count * index_size - 1); PUSH_DATA (push, va + info->count * index_size - 1); #define NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) \ (((0x80 + (a) * 0x10) / 4) << NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT) BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_REPLACE), 1); PUSH_DATA (push, NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) | 1); }