summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2012-04-14 06:08:08 +0200
committerChristoph Bumiller <[email protected]>2012-04-14 06:14:21 +0200
commitce713cd520792707e9097ef9e843ef7ab57b0eab (patch)
tree1d2422b070bb5bfc2858616ca991825a02dd83bb /src
parentedbfeed56f1ebd8517840ef48f8c87e24bb98157 (diff)
nvc0: replace VERTEX_DATA push mode with translate to buffer
While pushing vertices through the FIFO is relatively fast on nv50, it's horribly slow on nvc0.
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nvc0/Makefile.sources2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_stateobj.h4
-rw-r--r--src/gallium/drivers/nvc0/nvc0_vbo.c158
-rw-r--r--src/gallium/drivers/nvc0/nvc0_vbo_translate.c639
5 files changed, 748 insertions, 57 deletions
diff --git a/src/gallium/drivers/nvc0/Makefile.sources b/src/gallium/drivers/nvc0/Makefile.sources
index 7e431c69e81..394c5b9220e 100644
--- a/src/gallium/drivers/nvc0/Makefile.sources
+++ b/src/gallium/drivers/nvc0/Makefile.sources
@@ -11,9 +11,9 @@ C_SOURCES := \
nvc0_tex.c \
nvc0_transfer.c \
nvc0_vbo.c \
+ nvc0_vbo_translate.c \
nvc0_program.c \
nvc0_shader_state.c \
- nvc0_push.c \
nvc0_query.c
CPP_SOURCES := \
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index 8b80f2fe386..32de91e5644 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -88,6 +88,7 @@ struct nvc0_context {
uint32_t constant_elts;
int32_t index_bias;
uint16_t scissor;
+ uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
uint8_t num_vtxbufs;
uint8_t num_vtxelts;
uint8_t num_textures[5];
@@ -118,7 +119,6 @@ struct nvc0_context {
unsigned num_vtxbufs;
struct pipe_index_buffer idxbuf;
uint32_t constant_vbos;
- uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
unsigned vbo_min_index; /* from pipe_draw_info, for vertex upload */
unsigned vbo_max_index;
diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h
index bd543029705..fd932be1682 100644
--- a/src/gallium/drivers/nvc0/nvc0_stateobj.h
+++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h
@@ -35,6 +35,7 @@ struct nvc0_zsa_stateobj {
struct nvc0_vertex_element {
struct pipe_vertex_element pipe;
uint32_t state;
+ uint32_t state_alt; /* buffer 0 and with source offset (for translate) */
};
struct nvc0_vertex_stateobj {
@@ -43,8 +44,7 @@ struct nvc0_vertex_stateobj {
uint32_t instance_elts;
uint32_t instance_bufs;
boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */
- unsigned vtx_size;
- unsigned vtx_per_packet_max;
+ unsigned size; /* size of vertex in bytes (when packed) */
struct nvc0_vertex_element element[0];
};
diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c
index 7cb1e0a43b7..a8aa60f4fe5 100644
--- a/src/gallium/drivers/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nvc0/nvc0_vbo.c
@@ -86,31 +86,41 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
so->element[i].state = nvc0_format_table[fmt].vtx;
so->need_conversion = TRUE;
}
- so->element[i].state |= i;
+
+ if (unlikely(ve->instance_divisor)) {
+ so->instance_elts |= 1 << i;
+ so->instance_bufs |= 1 << vbi;
+ }
if (1) {
+ unsigned ca;
unsigned j = transkey.nr_elements++;
+ ca = util_format_description(fmt)->channel[0].size / 8;
+ if (ca != 1 && ca != 2)
+ ca = 4;
+
transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;
transkey.element[j].input_format = ve->src_format;
transkey.element[j].input_buffer = vbi;
transkey.element[j].input_offset = ve->src_offset;
transkey.element[j].instance_divisor = ve->instance_divisor;
+ transkey.output_stride = align(transkey.output_stride, ca);
transkey.element[j].output_format = fmt;
transkey.element[j].output_offset = transkey.output_stride;
- transkey.output_stride += (util_format_get_stride(fmt, 1) + 3) & ~3;
+ transkey.output_stride += util_format_get_blocksize(fmt);
- if (unlikely(ve->instance_divisor)) {
- so->instance_elts |= 1 << i;
- so->instance_bufs |= 1 << vbi;
- }
+ so->element[i].state_alt = so->element[i].state;
+ so->element[i].state_alt |= transkey.element[j].output_offset << 7;
}
+
+ so->element[i].state |= i << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT;
}
+ transkey.output_stride = align(transkey.output_stride, 4);
+ so->size = transkey.output_stride;
so->translate = translate_create(&transkey);
- so->vtx_size = transkey.output_stride / 4;
- so->vtx_per_packet_max = NV04_PFIFO_MAX_PACKET_LEN / MAX2(so->vtx_size, 1);
return so;
}
@@ -182,7 +192,10 @@ nvc0_vbuf_range(struct nvc0_context *nvc0, int vbi,
}
}
-static void
+/* Return whether to use alternative vertex submission mode (translate),
+ * and validate vertex buffers and upload user arrays (if normal mode).
+ */
+static uint8_t
nvc0_prevalidate_vbufs(struct nvc0_context *nvc0)
{
const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART;
@@ -192,7 +205,7 @@ nvc0_prevalidate_vbufs(struct nvc0_context *nvc0)
int i;
uint32_t base, size;
- nvc0->vbo_fifo = nvc0->vbo_user = 0;
+ nvc0->vbo_user = 0;
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
@@ -203,10 +216,8 @@ nvc0_prevalidate_vbufs(struct nvc0_context *nvc0)
buf = nv04_resource(vb->buffer);
if (!nouveau_resource_mapped_by_gpu(vb->buffer)) {
- if (nvc0->vbo_push_hint) {
- nvc0->vbo_fifo = ~0;
- return;
- }
+ if (nvc0->vbo_push_hint)
+ return 1;
nvc0->base.vbo_dirty = TRUE;
if (buf->status & NOUVEAU_BUFFER_STATUS_USER_MEMORY) {
@@ -223,6 +234,7 @@ nvc0_prevalidate_vbufs(struct nvc0_context *nvc0)
}
BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD);
}
+ return 0;
}
static void
@@ -283,55 +295,85 @@ nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
struct nvc0_vertex_element *ve;
uint32_t const_vbos;
unsigned i;
+ uint8_t vbo_mode;
boolean update_vertex;
if (unlikely(vertex->need_conversion) ||
unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) {
nvc0->vbo_user = 0;
- nvc0->vbo_fifo = ~nvc0->constant_vbos;
+ vbo_mode = 3;
} else {
- nvc0_prevalidate_vbufs(nvc0);
- nvc0->vbo_fifo &= ~nvc0->constant_vbos;
+ vbo_mode = nvc0_prevalidate_vbufs(nvc0);
}
- const_vbos = nvc0->vbo_fifo ? 0 : nvc0->constant_vbos;
+ const_vbos = vbo_mode ? 0 : nvc0->constant_vbos;
update_vertex = (nvc0->dirty & NVC0_NEW_VERTEX) ||
- (const_vbos != nvc0->state.constant_vbos);
+ (const_vbos != nvc0->state.constant_vbos) ||
+ (vbo_mode != nvc0->state.vbo_mode);
+
if (update_vertex) {
- uint32_t *restrict data;
const unsigned n = MAX2(vertex->num_elements, nvc0->state.num_vtxelts);
- if (unlikely(vertex->instance_elts != nvc0->state.instance_elts)) {
- nvc0->state.instance_elts = vertex->instance_elts;
- assert(n); /* if (n == 0), both masks should be 0 */
- PUSH_SPACE(push, 3);
- BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2);
- PUSH_DATA (push, n);
- PUSH_DATA (push, vertex->instance_elts);
- }
-
- nvc0->state.num_vtxelts = vertex->num_elements;
nvc0->state.constant_vbos = const_vbos;
nvc0->state.constant_elts = 0;
+ nvc0->state.num_vtxelts = vertex->num_elements;
+ nvc0->state.vbo_mode = vbo_mode;
+
+ if (unlikely(vbo_mode)) {
+ if (unlikely(nvc0->state.instance_elts & 3)) {
+ /* translate mode uses only 2 vertex buffers */
+ nvc0->state.instance_elts &= ~3;
+ PUSH_SPACE(push, 3);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(0)), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ }
+
+ PUSH_SPACE(push, n * 2 + 4);
- PUSH_SPACE(push, n * 2 + 1);
- BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n);
- data = push->cur;
- push->cur += n;
- for (i = 0; i < vertex->num_elements; ++data, ++i) {
- ve = &vertex->element[i];
- *data = ve->state;
- if (unlikely(const_vbos & (1 << ve->pipe.vertex_buffer_index))) {
- *data |= NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST;
- nvc0->state.constant_elts |= 1 << i;
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n);
+ for (i = 0; i < vertex->num_elements; ++i)
+ PUSH_DATA(push, vertex->element[i].state_alt);
+ for (; i < n; ++i)
+ PUSH_DATA(push, NVC0_3D_VERTEX_ATTRIB_INACTIVE);
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 1);
+ PUSH_DATA (push, (1 << 12) | vertex->size);
+ for (i = 1; i < n; ++i)
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
+ } else {
+ uint32_t *restrict data;
+
+ if (unlikely(vertex->instance_elts != nvc0->state.instance_elts)) {
+ nvc0->state.instance_elts = vertex->instance_elts;
+ assert(n); /* if (n == 0), both masks should be 0 */
+ PUSH_SPACE(push, 3);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2);
+ PUSH_DATA (push, n);
+ PUSH_DATA (push, vertex->instance_elts);
+ }
+
+ PUSH_SPACE(push, n * 2 + 1);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n);
+ data = push->cur;
+ push->cur += n;
+ for (i = 0; i < vertex->num_elements; ++i) {
+ ve = &vertex->element[i];
+ data[i] = ve->state;
+ if (unlikely(const_vbos & (1 << ve->pipe.vertex_buffer_index))) {
+ nvc0->state.constant_elts |= 1 << i;
+ data[i] |= NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST;
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
+ }
+ }
+ for (; i < n; ++i) {
+ data[i] = NVC0_3D_VERTEX_ATTRIB_INACTIVE;
IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
}
- }
- for (; i < n; ++data, ++i) {
- IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
- *data = NVC0_3D_VERTEX_ATTRIB_INACTIVE;
}
}
+ if (nvc0->state.vbo_mode) /* using translate, don't set up arrays here */
+ return;
PUSH_SPACE(push, vertex->num_elements * 8);
for (i = 0; i < vertex->num_elements; ++i) {
@@ -660,25 +702,35 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
/* For picking only a few vertices from a large user buffer, push is better,
* if index count is larger and we expect repeated vertices, suggest upload.
*/
- nvc0->vbo_push_hint = /* the 64 is heuristic */
- !(info->indexed &&
- ((info->max_index - info->min_index + 64) < info->count));
+ nvc0->vbo_push_hint =
+ info->indexed &&
+ (info->max_index - info->min_index) >= (info->count * 2);
nvc0->vbo_min_index = info->min_index;
nvc0->vbo_max_index = info->max_index;
- if (nvc0->vbo_push_hint != !!nvc0->vbo_fifo)
- nvc0->dirty |= NVC0_NEW_ARRAYS;
-
- if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS)))
- nvc0_update_user_vbufs(nvc0);
+ /* Check whether we want to switch vertex-submission mode,
+ * and if not, update user vbufs.
+ */
+ if (!(nvc0->dirty & NVC0_NEW_ARRAYS)) {
+ if (nvc0->vbo_push_hint) {
+ if (nvc0->vbo_user)
+ nvc0->dirty |= NVC0_NEW_ARRAYS; /* switch to translate mode */
+ } else
+ if (nvc0->state.vbo_mode == 1) {
+ nvc0->dirty |= NVC0_NEW_ARRAYS; /* back to normal mode */
+ }
+ if (nvc0->vbo_user &&
+ !(nvc0->dirty & (NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS)))
+ nvc0_update_user_vbufs(nvc0);
+ }
/* 8 as minimum to avoid immediate double validation of new buffers */
nvc0_state_validate(nvc0, ~0, 8);
push->kick_notify = nvc0_draw_vbo_kick_notify;
- if (nvc0->vbo_fifo) {
+ if (nvc0->state.vbo_mode) {
nvc0_push_vbo(nvc0, info);
push->kick_notify = nvc0_default_kick_notify;
return;
diff --git a/src/gallium/drivers/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nvc0/nvc0_vbo_translate.c
new file mode 100644
index 00000000000..26f8cb5fbaf
--- /dev/null
+++ b/src/gallium/drivers/nvc0/nvc0_vbo_translate.c
@@ -0,0 +1,639 @@
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0_context.h"
+#include "nvc0_resource.h"
+
+#include "nvc0_3d.xml.h"
+
+struct push_context {
+ struct nouveau_pushbuf *push;
+
+ struct translate *translate;
+ void *dest;
+ const void *idxbuf;
+
+ uint32_t vertex_size;
+ uint32_t restart_index;
+ uint32_t instance_id;
+
+ boolean prim_restart;
+ boolean need_vertex_id;
+
+ struct {
+ boolean enabled;
+ boolean value;
+ unsigned stride;
+ const uint8_t *data;
+ } edgeflag;
+};
+
+static void nvc0_push_upload_vertex_ids(struct push_context *,
+ struct nvc0_context *,
+ const struct pipe_draw_info *);
+
+static void
+nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx)
+{
+ ctx->push = nvc0->base.pushbuf;
+
+ ctx->translate = nvc0->vertex->translate;
+ ctx->vertex_size = nvc0->vertex->size;
+
+ ctx->need_vertex_id =
+ nvc0->vertprog->vp.need_vertex_id && (nvc0->vertex->num_elements < 32);
+
+ ctx->edgeflag.value = TRUE;
+ ctx->edgeflag.enabled = nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS;
+
+ /* silence warnings */
+ ctx->edgeflag.data = NULL;
+ ctx->edgeflag.stride = 0;
+}
+
+static INLINE void
+nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias)
+{
+ struct translate *translate = nvc0->vertex->translate;
+ unsigned i;
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ const uint8_t *map;
+ const struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];
+
+ map = nouveau_resource_map_offset(&nvc0->base,
+ nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD);
+
+ if (index_bias && !unlikely(nvc0->vertex->instance_bufs & (1 << i)))
+ map += (intptr_t)index_bias * vb->stride;
+
+ translate->set_buffer(translate, i, map, vb->stride, ~0);
+ }
+}
+
+static INLINE void
+nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0)
+{
+ struct nv04_resource *buf = nv04_resource(nvc0->idxbuf.buffer);
+ unsigned offset = nvc0->idxbuf.offset;
+
+ ctx->idxbuf = nouveau_resource_map_offset(&nvc0->base,
+ buf, offset, NOUVEAU_BO_RD);
+}
+
+static INLINE void
+nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0,
+ int32_t index_bias)
+{
+ unsigned attr = nvc0->vertprog->vp.edgeflag;
+ struct pipe_vertex_element *ve = &nvc0->vertex->element[attr].pipe;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
+ struct nv04_resource *buf = nv04_resource(vb->buffer);
+ unsigned offset = vb->buffer_offset + ve->src_offset;
+
+ ctx->edgeflag.stride = vb->stride;
+ ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base,
+ buf, offset, NOUVEAU_BO_RD);
+ if (index_bias)
+ ctx->edgeflag.data += (intptr_t)index_bias * vb->stride;
+}
+
+static INLINE unsigned
+prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index)
+{
+ unsigned i;
+ for (i = 0; i < push && elts[i] != index; ++i);
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index)
+{
+ unsigned i;
+ for (i = 0; i < push && elts[i] != index; ++i);
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index)
+{
+ unsigned i;
+ for (i = 0; i < push && elts[i] != index; ++i);
+ return i;
+}
+
+static INLINE boolean
+ef_value(const struct push_context *ctx, uint32_t index)
+{
+ float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
+ return *pf ? TRUE : FALSE;
+}
+
+static INLINE boolean
+ef_toggle(struct push_context *ctx)
+{
+ ctx->edgeflag.value = !ctx->edgeflag.value;
+ return ctx->edgeflag.value;
+}
+
+static INLINE unsigned
+ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ return i;
+}
+
+static INLINE unsigned
+ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ return i;
+}
+
+static INLINE unsigned
+ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ return i;
+}
+
+static INLINE unsigned
+ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n && ef_value(ctx, start++) == ctx->edgeflag.value; ++i);
+ return i;
+}
+
+static INLINE void *
+nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nouveau_bo *bo;
+ uint64_t va;
+ const unsigned size = count * nvc0->vertex->size;
+
+ void *const dest = nouveau_scratch_get(&nvc0->base, size, &va, &bo);
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2);
+ PUSH_DATAh(push, va);
+ PUSH_DATA (push, va);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+ PUSH_DATAh(push, va + size - 1);
+ PUSH_DATA (push, va + size - 1);
+
+ BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ bo);
+ nouveau_pushbuf_validate(push);
+
+ return dest;
+}
+
+static void
+disp_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct translate *translate = ctx->translate;
+ const uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start;
+ unsigned pos = 0;
+
+ do {
+ unsigned nR = count;
+
+ if (unlikely(ctx->prim_restart))
+ nR = prim_restart_search_i08(elts, nR, ctx->restart_index);
+
+ translate->run_elts8(translate, elts, nR, ctx->instance_id, ctx->dest);
+ count -= nR;
+ ctx->dest += nR * ctx->vertex_size;
+
+ while (nR) {
+ unsigned nE = nR;
+
+ if (unlikely(ctx->edgeflag.enabled))
+ nE = ef_toggle_search_i08(ctx, elts, nR);
+
+ PUSH_SPACE(push, 4);
+ if (likely(nE >= 2)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, pos);
+ PUSH_DATA (push, nE);
+ } else
+ if (nE) {
+ if (pos <= 0xff) {
+ IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, pos);
+ }
+ }
+ if (unlikely(nE != nR))
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));
+
+ pos += nE;
+ elts += nE;
+ nR -= nE;
+ }
+ if (count) {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, ctx->restart_index);
+ ++elts;
+ ctx->dest += ctx->vertex_size;
+ ++pos;
+ --count;
+ }
+ } while (count);
+}
+
+static void
+disp_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct translate *translate = ctx->translate;
+ const uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start;
+ unsigned pos = 0;
+
+ do {
+ unsigned nR = count;
+
+ if (unlikely(ctx->prim_restart))
+ nR = prim_restart_search_i16(elts, nR, ctx->restart_index);
+
+ translate->run_elts16(translate, elts, nR, ctx->instance_id, ctx->dest);
+ count -= nR;
+ ctx->dest += nR * ctx->vertex_size;
+
+ while (nR) {
+ unsigned nE = nR;
+
+ if (unlikely(ctx->edgeflag.enabled))
+ nE = ef_toggle_search_i16(ctx, elts, nR);
+
+ PUSH_SPACE(push, 4);
+ if (likely(nE >= 2)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, pos);
+ PUSH_DATA (push, nE);
+ } else
+ if (nE) {
+ if (pos <= 0xff) {
+ IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, pos);
+ }
+ }
+ if (unlikely(nE != nR))
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));
+
+ pos += nE;
+ elts += nE;
+ nR -= nE;
+ }
+ if (count) {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, ctx->restart_index);
+ ++elts;
+ ctx->dest += ctx->vertex_size;
+ ++pos;
+ --count;
+ }
+ } while (count);
+}
+
+static void
+disp_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct translate *translate = ctx->translate;
+ const uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start;
+ unsigned pos = 0;
+
+ do {
+ unsigned nR = count;
+
+ if (unlikely(ctx->prim_restart))
+ nR = prim_restart_search_i32(elts, nR, ctx->restart_index);
+
+ translate->run_elts(translate, elts, nR, ctx->instance_id, ctx->dest);
+ count -= nR;
+ ctx->dest += nR * ctx->vertex_size;
+
+ while (nR) {
+ unsigned nE = nR;
+
+ if (unlikely(ctx->edgeflag.enabled))
+ nE = ef_toggle_search_i32(ctx, elts, nR);
+
+ PUSH_SPACE(push, 4);
+ if (likely(nE >= 2)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, pos);
+ PUSH_DATA (push, nE);
+ } else
+ if (nE) {
+ if (pos <= 0xff) {
+ IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, pos);
+ }
+ }
+ if (unlikely(nE != nR))
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));
+
+ pos += nE;
+ elts += nE;
+ nR -= nE;
+ }
+ if (count) {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, ctx->restart_index);
+ ++elts;
+ ctx->dest += ctx->vertex_size;
+ ++pos;
+ --count;
+ }
+ } while (count);
+}
+
+static void
+disp_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct translate *translate = ctx->translate;
+ unsigned pos = 0;
+
+ translate->run(translate, start, count, ctx->instance_id, ctx->dest);
+ do {
+ unsigned nr = count;
+
+ if (unlikely(ctx->edgeflag.enabled))
+ nr = ef_toggle_search_seq(ctx, start + pos, nr);
+
+ PUSH_SPACE(push, 4);
+ if (likely(nr)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, pos);
+ PUSH_DATA (push, nr);
+ }
+ if (unlikely(nr != count))
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));
+
+ pos += nr;
+ count -= nr;
+ } while (count);
+}
+
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ }
+}
+
+void
+nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+ struct push_context ctx;
+ unsigned i, index_size;
+ unsigned inst_count = info->instance_count;
+ unsigned vert_count = info->count;
+ unsigned prim;
+
+ nvc0_push_context_init(nvc0, &ctx);
+
+ nvc0_vertex_configure_translate(nvc0, info->index_bias);
+
+ ctx.prim_restart = info->primitive_restart;
+ ctx.restart_index = info->restart_index;
+
+ if (info->indexed) {
+ nvc0_push_map_idxbuf(&ctx, nvc0);
+ index_size = nvc0->idxbuf.index_size;
+
+ if (info->primitive_restart) {
+ BEGIN_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 2);
+ PUSH_DATA (ctx.push, 1);
+ PUSH_DATA (ctx.push, info->restart_index);
+ } else
+ if (nvc0->state.prim_restart) {
+ IMMED_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
+ }
+ nvc0->state.prim_restart = info->primitive_restart;
+ } else {
+ if (unlikely(info->count_from_stream_output)) {
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct nvc0_so_target *targ;
+ targ = nvc0_so_target(info->count_from_stream_output);
+ pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+ vert_count /= targ->stride;
+ }
+ ctx.idxbuf = NULL; /* shut up warnings */
+ index_size = 0;
+ }
+
+ ctx.instance_id = info->start_instance;
+
+ prim = nvc0_prim_gl(info->mode);
+ do {
+ PUSH_SPACE(ctx.push, 9);
+
+ ctx.dest = nvc0_push_setup_vertex_array(nvc0, vert_count);
+ if (unlikely(!ctx.dest))
+ break;
+
+ if (unlikely(ctx.need_vertex_id))
+ nvc0_push_upload_vertex_ids(&ctx, nvc0, info);
+
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (ctx.push, prim);
+ switch (index_size) {
+ case 1:
+ disp_vertices_i08(&ctx, info->start, vert_count);
+ break;
+ case 2:
+ disp_vertices_i16(&ctx, info->start, vert_count);
+ break;
+ case 4:
+ disp_vertices_i32(&ctx, info->start, vert_count);
+ break;
+ default:
+ assert(index_size == 0);
+ disp_vertices_seq(&ctx, info->start, vert_count);
+ break;
+ }
+ PUSH_SPACE(ctx.push, 1);
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0);
+
+ if (--inst_count) {
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ ++ctx.instance_id;
+ }
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
+ nouveau_scratch_done(&nvc0->base);
+ } while (inst_count);
+
+
+ /* reset state and unmap buffers (no-op) */
+
+ if (unlikely(!ctx.edgeflag.value)) {
+ PUSH_SPACE(ctx.push, 1);
+ IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1);
+ }
+
+ if (unlikely(ctx.need_vertex_id)) {
+ PUSH_SPACE(ctx.push, 4);
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0);
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(1)), 1);
+ PUSH_DATA (ctx.push,
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32);
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 0);
+ }
+
+ if (info->indexed)
+ nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer));
+ for (i = 0; i < nvc0->num_vtxbufs; ++i)
+ nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer));
+}
+
+static INLINE void
+copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n; ++i)
+ dst[i] = elts[i] + bias;
+}
+
+static INLINE void
+copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n; ++i)
+ dst[i] = elts[i] + bias;
+}
+
+static INLINE void
+copy_indices_u32(uint32_t *dst, const uint32_t *elts, uint32_t bias, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n; ++i)
+ dst[i] = elts[i] + bias;
+}
+
+static void
+nvc0_push_upload_vertex_ids(struct push_context *ctx,
+ struct nvc0_context *nvc0,
+ const struct pipe_draw_info *info)
+
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct nouveau_bo *bo;
+ uint64_t va;
+ uint32_t *data;
+ uint32_t format;
+ unsigned index_size = nvc0->idxbuf.index_size;
+ unsigned i;
+ unsigned a = nvc0->vertex->num_elements;
+
+ if (!index_size || info->index_bias)
+ index_size = 4;
+ data = (uint32_t *)nouveau_scratch_get(&nvc0->base,
+ info->count * index_size, &va, &bo);
+
+ BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ bo);
+ nouveau_pushbuf_validate(push);
+
+ if (info->indexed) {
+ if (!info->index_bias) {
+ memcpy(data, ctx->idxbuf, info->count * index_size);
+ } else {
+ switch (nvc0->idxbuf.index_size) {
+ case 1:
+ copy_indices_u8(data, ctx->idxbuf, info->index_bias, info->count);
+ break;
+ case 2:
+ copy_indices_u16(data, ctx->idxbuf, info->index_bias, info->count);
+ break;
+ default:
+ copy_indices_u32(data, ctx->idxbuf, info->index_bias, info->count);
+ break;
+ }
+ }
+ } else {
+ for (i = 0; i < info->count; ++i)
+ data[i] = i + (info->start + info->index_bias);
+ }
+
+ format = (1 << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT;
+
+ switch (index_size) {
+ case 1:
+ format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8;
+ break;
+ case 2:
+ format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16;
+ break;
+ default:
+ format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32;
+ break;
+ }
+
+ PUSH_SPACE(push, 12);
+
+ if (unlikely(nvc0->state.instance_elts & 2)) {
+ nvc0->state.instance_elts &= ~2;
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(1)), 0);
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1);
+ PUSH_DATA (push, format);
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 3);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size);
+ PUSH_DATAh(push, va);
+ PUSH_DATA (push, va);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
+ PUSH_DATAh(push, va + info->count * index_size - 1);
+ PUSH_DATA (push, va + info->count * index_size - 1);
+
+#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) \
+ (((0x80 + (a) * 0x10) / 4) << NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT)
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_REPLACE), 1);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) | 1);
+}