summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/nouveau/nv_object.xml.h13
-rw-r--r--src/gallium/drivers/nvc0/nvc0_3d.xml.h8
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.c2
-rw-r--r--src/gallium/drivers/nvc0/nvc0_context.h25
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.c43
-rw-r--r--src/gallium/drivers/nvc0/nvc0_program.h11
-rw-r--r--src/gallium/drivers/nvc0/nvc0_push.c21
-rw-r--r--src/gallium/drivers/nvc0/nvc0_query.c135
-rw-r--r--src/gallium/drivers/nvc0/nvc0_screen.c8
-rw-r--r--src/gallium/drivers/nvc0/nvc0_shader_state.c104
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state.c118
-rw-r--r--src/gallium/drivers/nvc0/nvc0_state_validate.c16
-rw-r--r--src/gallium/drivers/nvc0/nvc0_stateobj.h16
-rw-r--r--src/gallium/drivers/nvc0/nvc0_surface.c6
-rw-r--r--src/gallium/drivers/nvc0/nvc0_vbo.c35
15 files changed, 372 insertions, 189 deletions
diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
index a5b0d0478c8..47dc6751041 100644
--- a/src/gallium/drivers/nouveau/nv_object.xml.h
+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
@@ -185,15 +185,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV01_SUBCHAN_OBJECT 0x00000000
-#define NV84_SUBCHAN_QUERY_ADDRESS_HIGH 0x00000010
+#define NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH 0x00000010
-#define NV84_SUBCHAN_QUERY_ADDRESS_LOW 0x00000014
+#define NV84_SUBCHAN_SEMAPHORE_ADDRESS_LOW 0x00000014
-#define NV84_SUBCHAN_QUERY_COUNTER 0x00000018
+#define NV84_SUBCHAN_SEMAPHORE_SEQUENCE 0x00000018
-#define NV84_SUBCHAN_QUERY_GET 0x0000001c
+#define NV84_SUBCHAN_SEMAPHORE_TRIGGER 0x0000001c
+#define NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL 0x00000001
+#define NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG 0x00000002
+#define NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_GEQUAL 0x00000004
-#define NV84_SUBCHAN_QUERY_INTR 0x00000020
+#define NV84_SUBCHAN_NOTIFY_INTR 0x00000020
#define NV84_SUBCHAN_WRCACHE_FLUSH 0x00000024
diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
index a8d91082740..c32fa3ac939 100644
--- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h
@@ -130,11 +130,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0))
-#define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0))
+#define NVC0_3D_TFB_BUFFER_OFFSET(i0) (0x00000390 + 0x20*(i0))
-#define NVC0_3D_TFB_UNK07X0(i0) (0x00000700 + 0x10*(i0))
-#define NVC0_3D_TFB_UNK07X0__ESIZE 0x00000010
-#define NVC0_3D_TFB_UNK07X0__LEN 0x00000004
+#define NVC0_3D_TFB_STREAM(i0) (0x00000700 + 0x10*(i0))
+#define NVC0_3D_TFB_STREAM__ESIZE 0x00000010
+#define NVC0_3D_TFB_STREAM__LEN 0x00000004
#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0))
#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 1bb90a38725..1b3a06dfa33 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -77,7 +77,7 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0)
}
for (i = 0; i < nvc0->num_tfbbufs; ++i)
- pipe_resource_reference(&nvc0->tfbbuf[i], NULL);
+ pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
}
static void
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index 4435c1b4f3c..af95d1ab6aa 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -49,14 +49,14 @@
#define NVC0_NEW_CONSTBUF (1 << 18)
#define NVC0_NEW_TEXTURES (1 << 19)
#define NVC0_NEW_SAMPLERS (1 << 20)
-#define NVC0_NEW_TFB (1 << 21)
-#define NVC0_NEW_TFB_BUFFERS (1 << 22)
+#define NVC0_NEW_TFB_TARGETS (1 << 21)
#define NVC0_BUFCTX_CONSTANT 0
#define NVC0_BUFCTX_FRAME 1
#define NVC0_BUFCTX_VERTEX 2
#define NVC0_BUFCTX_TEXTURES 3
-#define NVC0_BUFCTX_COUNT 4
+#define NVC0_BUFCTX_TFB 4
+#define NVC0_BUFCTX_COUNT 5
struct nvc0_context {
struct nouveau_context base;
@@ -75,6 +75,7 @@ struct nvc0_context {
boolean prim_restart;
boolean early_z;
uint16_t scissor;
+ boolean rasterizer_discard;
uint8_t num_vtxbufs;
uint8_t num_vtxelts;
uint8_t num_textures[5];
@@ -84,6 +85,7 @@ struct nvc0_context {
uint8_t clip_enable;
uint32_t clip_mode;
uint32_t uniform_buffer_bound[5];
+ struct nvc0_transform_feedback_state *tfb;
} state;
struct nvc0_blend_stateobj *blend;
@@ -125,10 +127,9 @@ struct nvc0_context {
boolean vbo_push_hint;
- struct nvc0_transform_feedback_state *tfb;
- struct pipe_resource *tfbbuf[4];
+ uint8_t tfbbuf_dirty;
+ struct pipe_stream_output_target *tfbbuf[4];
unsigned num_tfbbufs;
- unsigned tfb_offset[4];
struct draw_context *draw;
};
@@ -170,10 +171,14 @@ void nvc0_program_library_upload(struct nvc0_context *);
/* nvc0_query.c */
void nvc0_init_query_functions(struct nvc0_context *);
-void nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
- struct pipe_query *pq, unsigned result_offset);
-
-#define NVC0_QUERY_TFB_BUFFER_OFFSETS (PIPE_QUERY_TYPES + 0)
+void nvc0_query_pushbuf_submit(struct nouveau_channel *,
+ struct pipe_query *, unsigned result_offset);
+void nvc0_query_fifo_wait(struct nouveau_channel *, struct pipe_query *);
+void nvc0_so_target_save_offset(struct pipe_context *,
+ struct pipe_stream_output_target *, unsigned i,
+ boolean *serialize);
+
+#define NVC0_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
/* nvc0_shader_state.c */
void nvc0_vertprog_validate(struct nvc0_context *);
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
index f3185b488e8..605bca5e6ba 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -480,6 +480,40 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
return 0;
}
+static struct nvc0_transform_feedback_state *
+nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info,
+ const struct pipe_stream_output_info *pso)
+{
+ struct nvc0_transform_feedback_state *tfb;
+ int n = 0;
+ int i, c, b;
+
+ tfb = MALLOC(sizeof(*tfb) + pso->num_outputs * 4 * sizeof(uint8_t));
+ if (!tfb)
+ return NULL;
+
+ for (b = 0; b < 4; ++b) {
+ tfb->varying_count[b] = 0;
+
+ for (i = 0; i < pso->num_outputs; ++i) {
+ if (pso->output[i].output_buffer != b)
+ continue;
+ for (c = 0; c < 4; ++c) {
+ if (!(pso->output[i].register_mask & (1 << c)))
+ continue;
+ tfb->varying_count[b]++;
+ tfb->varying_index[n++] =
+ info->out[pso->output[i].register_index].slot[c];
+ }
+ }
+ tfb->stride[b] = tfb->varying_count[b] * 4;
+ }
+ if (pso->stride)
+ tfb->stride[0] = pso->stride;
+
+ return tfb;
+}
+
#ifdef DEBUG
static void
nvc0_program_dump(struct nvc0_program *prog)
@@ -577,6 +611,10 @@ nvc0_program_translate(struct nvc0_program *prog)
if (info->io.globalAccess)
prog->hdr[0] |= 1 << 16;
+ if (prog->pipe.stream_output.num_outputs)
+ prog->tfb = nvc0_program_create_tfb_state(info,
+ &prog->pipe.stream_output);
+
out:
FREE(info);
return !ret;
@@ -675,6 +713,11 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
FREE(prog->immd_data);
if (prog->relocs)
FREE(prog->relocs);
+ if (prog->tfb) {
+ if (nvc0->state.tfb == prog->tfb)
+ nvc0->state.tfb = NULL;
+ FREE(prog->tfb);
+ }
memset(prog->hdr, 0, sizeof(prog->hdr));
diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h
index b107850ea92..10eb9f724d5 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nvc0/nvc0_program.h
@@ -6,6 +6,14 @@
#define NVC0_CAP_MAX_PROGRAM_TEMPS 128
+
+struct nvc0_transform_feedback_state {
+ uint32_t stride[4];
+ uint8_t varying_count[4];
+ uint8_t varying_index[0];
+};
+
+
#define NVC0_SHADER_HEADER_SIZE (20 * 4)
struct nvc0_program {
@@ -31,7 +39,6 @@ struct nvc0_program {
uint8_t clip_enable; /* only applies if num_ucps == 0 */
uint8_t edgeflag;
uint8_t num_ucps;
- uint8_t out_pos[PIPE_MAX_SHADER_OUTPUTS];
} vp;
struct {
uint8_t early_z;
@@ -44,6 +51,8 @@ struct nvc0_program {
void *relocs;
+ struct nvc0_transform_feedback_state *tfb;
+
struct nouveau_resource *res;
};
diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c
index 2e9f4c10928..238671d721c 100644
--- a/src/gallium/drivers/nvc0/nvc0_push.c
+++ b/src/gallium/drivers/nvc0/nvc0_push.c
@@ -273,7 +273,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
{
struct push_context ctx;
unsigned i, index_size;
- unsigned inst = info->instance_count;
+ unsigned inst_count = info->instance_count;
+ unsigned vert_count = info->count;
boolean apply_bias = info->indexed && info->index_bias;
init_push_context(nvc0, &ctx);
@@ -312,26 +313,34 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
index_size = 0;
ctx.primitive_restart = FALSE;
ctx.restart_index = 0;
+
+ if (info->count_from_stream_output) {
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct nvc0_so_target *targ;
+ targ = nvc0_so_target(info->count_from_stream_output);
+ pipe->get_query_result(pipe, targ->pq, TRUE, &vert_count);
+ vert_count /= targ->stride;
+ }
}
ctx.instance_id = info->start_instance;
ctx.prim = nvc0_prim_gl(info->mode);
- while (inst--) {
+ while (inst_count--) {
BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1);
OUT_RING (ctx.chan, ctx.prim);
switch (index_size) {
case 0:
- emit_vertices_seq(&ctx, info->start, info->count);
+ emit_vertices_seq(&ctx, info->start, vert_count);
break;
case 1:
- emit_vertices_i08(&ctx, info->start, info->count);
+ emit_vertices_i08(&ctx, info->start, vert_count);
break;
case 2:
- emit_vertices_i16(&ctx, info->start, info->count);
+ emit_vertices_i16(&ctx, info->start, vert_count);
break;
case 4:
- emit_vertices_i32(&ctx, info->start, info->count);
+ emit_vertices_i32(&ctx, info->start, vert_count);
break;
default:
assert(0);
diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c
index be363a21e6d..7244d838f7d 100644
--- a/src/gallium/drivers/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nvc0/nvc0_query.c
@@ -27,7 +27,8 @@
struct nvc0_query {
uint32_t *data;
- uint32_t type;
+ uint16_t type;
+ uint16_t index;
uint32_t sequence;
struct nouveau_bo *bo;
uint32_t base;
@@ -103,7 +104,6 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type)
switch (type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
- case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
q->rotate = 32;
space = NVC0_QUERY_ALLOC_SPACE;
break;
@@ -112,6 +112,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type)
space = 512;
break;
case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
q->is64bit = TRUE;
space = 64;
break;
@@ -123,7 +124,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type)
case PIPE_QUERY_PRIMITIVES_EMITTED:
space = 32;
break;
- case NVC0_QUERY_TFB_BUFFER_OFFSETS:
+ case NVC0_QUERY_TFB_BUFFER_OFFSET:
space = 16;
break;
default:
@@ -141,7 +142,9 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type)
/* we advance before query_begin ! */
q->offset -= q->rotate;
q->data -= q->rotate / sizeof(*q->data);
- }
+ } else
+ if (!q->is64bit)
+ q->data[0] = 0; /* initialize sequence */
return (struct pipe_query *)q;
}
@@ -176,8 +179,6 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct nvc0_query *q = nvc0_query(pq);
- const int index = 0; /* vertex stream */
-
/* For occlusion queries we have to change the storage, because a previous
* query might set the initial render conition to FALSE even *after* we re-
* initialized it to TRUE.
@@ -188,12 +189,12 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
/* XXX: can we do this with the GPU, and sync with respect to a previous
* query ?
*/
+ q->data[0] = q->sequence; /* initialize sequence */
q->data[1] = 1; /* initial render condition = TRUE */
q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
q->data[5] = 0;
}
- if (!q->is64bit)
- q->data[0] = q->sequence++; /* the previously used one */
+ q->sequence++;
switch (q->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -208,14 +209,17 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
}
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
- nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
+ nvc0_query_get(chan, q, 0x10, 0x06805002 | (q->index << 5));
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
- nvc0_query_get(chan, q, 0x10, 0x05805002 | (index << 5));
+ nvc0_query_get(chan, q, 0x10, 0x05805002 | (q->index << 5));
break;
case PIPE_QUERY_SO_STATISTICS:
- nvc0_query_get(chan, q, 0x20, 0x05805002 | (index << 5));
- nvc0_query_get(chan, q, 0x30, 0x06805002 | (index << 5));
+ nvc0_query_get(chan, q, 0x20, 0x05805002 | (q->index << 5));
+ nvc0_query_get(chan, q, 0x30, 0x06805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ nvc0_query_get(chan, q, 0x10, 0x03005002 | (q->index << 5));
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_TIME_ELAPSED:
@@ -247,15 +251,11 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct nvc0_query *q = nvc0_query(pq);
- const int index = 0; /* for multiple vertex streams */
-
if (!q->active) {
/* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
if (q->rotate)
nvc0_query_rotate(nvc0, q);
- else
- if (!q->is64bit)
- q->data[0] = q->sequence++;
+ q->sequence++;
}
q->ready = FALSE;
q->active = FALSE;
@@ -268,17 +268,20 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 0);
break;
case PIPE_QUERY_PRIMITIVES_GENERATED:
- nvc0_query_get(chan, q, 0, 0x06805002 | (index << 5));
+ nvc0_query_get(chan, q, 0, 0x06805002 | (q->index << 5));
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
- nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5));
+ nvc0_query_get(chan, q, 0, 0x05805002 | (q->index << 5));
break;
case PIPE_QUERY_SO_STATISTICS:
- nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5));
- nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5));
+ nvc0_query_get(chan, q, 0x00, 0x05805002 | (q->index << 5));
+ nvc0_query_get(chan, q, 0x10, 0x06805002 | (q->index << 5));
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- nvc0_query_get(chan, q, 0x00, 0x02005002 | (index << 5));
+ /* TODO: How do we sum over all streams for render condition ? */
+ /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
+ nvc0_query_get(chan, q, 0x00, 0x03005002 | (q->index << 5));
+ nvc0_query_get(chan, q, 0x20, 0x00005002);
break;
case PIPE_QUERY_TIMESTAMP:
case PIPE_QUERY_TIMESTAMP_DISJOINT:
@@ -300,11 +303,9 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
nvc0_query_get(chan, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
nvc0_query_get(chan, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
break;
- case NVC0_QUERY_TFB_BUFFER_OFFSETS:
- nvc0_query_get(chan, q, 0x00, 0x1d005002); /* TFB, BUFFER_OFFSET */
- nvc0_query_get(chan, q, 0x04, 0x1d005022);
- nvc0_query_get(chan, q, 0x08, 0x1d005042);
- nvc0_query_get(chan, q, 0x0c, 0x1d005062);
+ case NVC0_QUERY_TFB_BUFFER_OFFSET:
+ /* indexed by TFB buffer instead of by vertex stream */
+ nvc0_query_get(chan, q, 0x00, 0x0d005002 | (q->index << 5));
break;
default:
assert(0);
@@ -315,7 +316,14 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
static INLINE boolean
nvc0_query_ready(struct nvc0_query *q)
{
- return q->ready || (!q->is64bit && (q->data[0] == q->sequence));
+ if (q->is64bit) {
+ if (nouveau_bo_map(q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_NOWAIT))
+ return FALSE;
+ nouveau_bo_unmap(q->bo);
+ return TRUE;
+ } else {
+ return q->data[0] == q->sequence;
+ }
}
static INLINE boolean
@@ -355,14 +363,12 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
switch (q->type) {
case PIPE_QUERY_GPU_FINISHED:
- res32[0] = 0;
res8[0] = TRUE;
break;
case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
res64[0] = q->data[1] - q->data[5];
break;
case PIPE_QUERY_OCCLUSION_PREDICATE:
- res32[0] = 0;
res8[0] = q->data[1] != q->data[5];
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
@@ -374,15 +380,13 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
res64[1] = data64[2] - data64[6];
break;
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- res32[0] = 0;
- res8[0] = !q->data[1];
+ res8[0] = data64[0] != data64[2];
break;
case PIPE_QUERY_TIMESTAMP:
res64[0] = data64[1];
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
res64[0] = 1000000000;
- res32[2] = 0;
res8[8] = (data64[1] == data64[3]) ? FALSE : TRUE;
break;
case PIPE_QUERY_TIME_ELAPSED:
@@ -392,11 +396,8 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
for (i = 0; i < 10; ++i)
res64[i] = data64[i * 2] - data64[24 + i * 2];
break;
- case NVC0_QUERY_TFB_BUFFER_OFFSETS:
- res32[0] = q->data[0];
- res32[1] = q->data[1];
- res32[2] = q->data[2];
- res32[3] = q->data[3];
+ case NVC0_QUERY_TFB_BUFFER_OFFSET:
+ res32[0] = q->data[1];
break;
default:
return FALSE;
@@ -405,6 +406,23 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
return TRUE;
}
+void
+nvc0_query_fifo_wait(struct nouveau_channel *chan, struct pipe_query *pq)
+{
+ struct nvc0_query *q = nvc0_query(pq);
+ unsigned offset = q->offset;
+
+ if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
+
+ MARK_RING (chan, 5, 2);
+ BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
+ OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ OUT_RING (chan, q->sequence);
+ OUT_RING (chan, (1 << 12) |
+ NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
+}
+
static void
nvc0_render_condition(struct pipe_context *pipe,
struct pipe_query *pq, uint mode)
@@ -427,9 +445,8 @@ nvc0_render_condition(struct pipe_context *pipe,
/* NOTE: comparison of 2 queries only works if both have completed */
switch (q->type) {
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
- /* query writes 1 if there was no overflow */
- cond = negated ? NVC0_3D_COND_MODE_RES_NON_ZERO :
- NVC0_3D_COND_MODE_EQUAL;
+ cond = negated ? NVC0_3D_COND_MODE_EQUAL :
+ NVC0_3D_COND_MODE_NOT_EQUAL;
wait = TRUE;
break;
case PIPE_QUERY_OCCLUSION_COUNTER:
@@ -450,14 +467,8 @@ nvc0_render_condition(struct pipe_context *pipe,
break;
}
- if (wait) {
- MARK_RING (chan, 5, 2);
- BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4);
- OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
- OUT_RING (chan, q->sequence);
- OUT_RING (chan, 0x00001001);
- }
+ if (wait)
+ nvc0_query_fifo_wait(chan, pq);
MARK_RING (chan, 4, 2);
BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3);
@@ -467,13 +478,33 @@ nvc0_render_condition(struct pipe_context *pipe,
}
void
-nvc0_query_pushbuf_submit(struct nvc0_context *nvc0,
+nvc0_query_pushbuf_submit(struct nouveau_channel *chan,
struct pipe_query *pq, unsigned result_offset)
{
struct nvc0_query *q = nvc0_query(pq);
- nouveau_pushbuf_submit(nvc0->screen->base.channel,
- q->bo, q->offset + result_offset, 4);
+#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
+
+ nouveau_pushbuf_submit(chan, q->bo, q->offset + result_offset, 4 |
+ NVC0_IB_ENTRY_1_NO_PREFETCH);
+}
+
+void
+nvc0_so_target_save_offset(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg,
+ unsigned index, boolean *serialize)
+{
+ struct nvc0_so_target *targ = nvc0_so_target(ptarg);
+
+ if (*serialize) {
+ struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel;
+ *serialize = FALSE;
+ IMMED_RING(chan, RING_3D(SERIALIZE), 0);
+ }
+
+ nvc0_query(targ->pq)->index = index;
+
+ nvc0_query_end(pipe, targ->pq);
}
void
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index 43fcc617910..67f9175f18e 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -104,9 +104,15 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 1;
case PIPE_CAP_TIMER_QUERY:
case PIPE_CAP_OCCLUSION_QUERY:
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
return 1;
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
- return 0;
+ return 4;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_ATTRIBS:
+ return 4;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ return 128;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_INDEP_BLEND_ENABLE:
case PIPE_CAP_INDEP_BLEND_FUNC:
diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
index f4a12fbee11..446bd948a53 100644
--- a/src/gallium/drivers/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c
@@ -90,7 +90,9 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
return FALSE;
}
- return nvc0_program_upload_code(nvc0, prog);
+ if (likely(prog->code_size))
+ return nvc0_program_upload_code(nvc0, prog);
+ return TRUE; /* stream output info only */
}
void
@@ -212,14 +214,15 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
struct nouveau_channel *chan = nvc0->screen->base.channel;
struct nvc0_program *gp = nvc0->gmtyprog;
- if (!gp) {
+ if (gp)
+ nvc0_program_validate(nvc0, gp);
+ /* we allow GPs with no code for specifying stream output state only */
+ if (!gp || !gp->code_size) {
BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
OUT_RING (chan, 0x40);
IMMED_RING(chan, RING_3D(LAYER), 0);
return;
}
- if (!nvc0_program_validate(nvc0, gp))
- return;
nvc0_program_update_context_state(nvc0, gp, 3);
BEGIN_RING(chan, RING_3D(GP_SELECT), 1);
@@ -234,57 +237,76 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
nvc0_program_validate_clip(nvc0, gp);
}
-/* It's *is* kind of shader related. We need to inspect the program
- * to get the output locations right.
- */
void
nvc0_tfb_validate(struct nvc0_context *nvc0)
{
struct nouveau_channel *chan = nvc0->screen->base.channel;
- struct nvc0_program *vp;
- struct nvc0_transform_feedback_state *tfb = nvc0->tfb;
- int b;
+ struct nvc0_transform_feedback_state *tfb;
+ unsigned b, n, i;
- BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1);
- if (!tfb) {
- OUT_RING(chan, 0);
- return;
+ if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb;
+ else
+ if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb;
+ else
+ tfb = nvc0->vertprog->tfb;
+
+ IMMED_RING(chan, RING_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0);
+
+ if (tfb && tfb != nvc0->state.tfb) {
+ uint8_t var[128];
+
+ for (n = 0, b = 0; b < 4; n += tfb->varying_count[b++]) {
+ if (tfb->varying_count[b]) {
+ BEGIN_RING(chan, RING_3D(TFB_STREAM(b)), 3);
+ OUT_RING (chan, 0);
+ OUT_RING (chan, tfb->varying_count[b]);
+ OUT_RING (chan, tfb->stride[b]);
+
+ for (i = 0; i < tfb->varying_count[b]; ++i)
+ var[i] = tfb->varying_index[n + i];
+ for (; i & 3; ++i)
+ var[i] = 0; /* zero rest of method word bits */
+
+ BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4);
+ OUT_RINGp (chan, var, i / 4);
+
+ if (nvc0->tfbbuf[b])
+ nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b];
+ } else {
+ IMMED_RING(chan, RING_3D(TFB_VARYING_COUNT(b)), 0);
+ }
+ }
}
- OUT_RING(chan, 1);
+ nvc0->state.tfb = tfb;
- vp = nvc0->vertprog ? nvc0->vertprog : nvc0->gmtyprog;
+ if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS))
+ return;
+ nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TFB);
for (b = 0; b < nvc0->num_tfbbufs; ++b) {
- uint8_t idx, var[128];
- int i, n;
- struct nv04_resource *buf = nv04_resource(nvc0->tfbbuf[b]);
+ struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
+ struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
- BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5);
- OUT_RING (chan, 1);
- OUT_RESRCh(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR);
- OUT_RESRCl(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR);
- OUT_RING (chan, buf->base.width0 - nvc0->tfb_offset[b]);
- OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID <- offset ? */
+ if (tfb)
+ targ->stride = tfb->stride[b];
- if (!(nvc0->dirty & NVC0_NEW_TFB))
+ if (!(nvc0->tfbbuf_dirty & (1 << b)))
continue;
- BEGIN_RING(chan, RING_3D(TFB_UNK07X0(b)), 3);
- OUT_RING (chan, 0);
- OUT_RING (chan, tfb->varying_count[b]);
- OUT_RING (chan, tfb->stride[b]);
-
- n = b ? tfb->varying_count[b - 1] : 0;
- i = 0;
- for (; i < tfb->varying_count[b]; ++i) {
- idx = tfb->varying_index[n + i];
- var[i] = vp->vp.out_pos[idx >> 2] + (idx & 3);
+ if (!targ->clean)
+ nvc0_query_fifo_wait(chan, targ->pq);
+ BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5);
+ OUT_RING (chan, 1);
+ OUT_RESRCh(chan, buf, targ->pipe.buffer_offset, NOUVEAU_BO_WR);
+ OUT_RESRCl(chan, buf, targ->pipe.buffer_offset, NOUVEAU_BO_WR);
+ OUT_RING (chan, targ->pipe.buffer_size);
+ if (!targ->clean) {
+ nvc0_query_pushbuf_submit(chan, targ->pq, 0x4);
+ } else {
+ OUT_RING(chan, 0); /* TFB_BUFFER_OFFSET */
+ targ->clean = FALSE;
}
- for (; i & 3; ++i)
- var[i] = 0;
-
- BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4);
- OUT_RINGp (chan, var, i / 4);
+ nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TFB, buf, NOUVEAU_BO_WR);
}
for (; b < 4; ++b)
IMMED_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 0);
diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c
index 63d53ab70d0..1e334a01d87 100644
--- a/src/gallium/drivers/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_state.c
@@ -520,7 +520,12 @@ nvc0_sp_state_create(struct pipe_context *pipe,
return NULL;
prog->type = type;
- prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ if (cso->tokens)
+ prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ if (cso->stream_output.num_outputs)
+ prog->pipe.stream_output = cso->stream_output;
return (void *)prog;
}
@@ -747,72 +752,75 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
nvc0->dirty |= NVC0_NEW_VERTEX;
}
-static void *
-nvc0_tfb_state_create(struct pipe_context *pipe,
- const struct pipe_stream_output_info *pso)
+static struct pipe_stream_output_target *
+nvc0_so_target_create(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size)
{
- struct nvc0_transform_feedback_state *so;
- int n = 0;
- int i, c, b;
-
- so = MALLOC(sizeof(*so) + pso->num_outputs * 4 * sizeof(uint8_t));
- if (!so)
+ struct nvc0_so_target *targ = MALLOC_STRUCT(nvc0_so_target);
+ if (!targ)
return NULL;
- for (b = 0; b < 4; ++b) {
- for (i = 0; i < pso->num_outputs; ++i) {
- if (pso->output[i].output_buffer != b)
- continue;
- for (c = 0; c < 4; ++c) {
- if (!(pso->output[i].register_mask & (1 << c)))
- continue;
- so->varying_count[b]++;
- so->varying_index[n++] = (pso->output[i].register_index << 2) | c;
- }
- }
- so->stride[b] = so->varying_count[b] * 4;
+ targ->pq = pipe->create_query(pipe, NVC0_QUERY_TFB_BUFFER_OFFSET);
+ if (!targ->pq) {
+ FREE(targ);
+ return NULL;
}
- if (pso->stride)
- so->stride[0] = pso->stride;
+ targ->clean = TRUE;
- return so;
-}
+ targ->pipe.buffer_size = size;
+ targ->pipe.buffer_offset = offset;
+ targ->pipe.context = pipe;
+ targ->pipe.buffer = NULL;
+ pipe_resource_reference(&targ->pipe.buffer, res);
+ pipe_reference_init(&targ->pipe.reference, 1);
-static void
-nvc0_tfb_state_delete(struct pipe_context *pipe, void *hwcso)
-{
- FREE(hwcso);
+ return &targ->pipe;
}
static void
-nvc0_tfb_state_bind(struct pipe_context *pipe, void *hwcso)
+nvc0_so_target_destroy(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg)
{
- nvc0_context(pipe)->tfb = hwcso;
- nvc0_context(pipe)->dirty |= NVC0_NEW_TFB;
+ struct nvc0_so_target *targ = nvc0_so_target(ptarg);
+ pipe->destroy_query(pipe, targ->pq);
+ FREE(targ);
}
static void
-nvc0_set_transform_feedback_buffers(struct pipe_context *pipe,
- struct pipe_resource **buffers,
- int *offsets,
- int num_buffers)
+nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ unsigned append_mask)
{
struct nvc0_context *nvc0 = nvc0_context(pipe);
- int i;
+ unsigned i;
+ boolean serialize = TRUE;
- assert(num_buffers >= 0 && num_buffers <= 4); /* why signed ? */
+ assert(num_targets <= 4);
- for (i = 0; i < num_buffers; ++i) {
- assert(offsets[i] >= 0);
- nvc0->tfb_offset[i] = offsets[i];
- pipe_resource_reference(&nvc0->tfbbuf[i], buffers[i]);
- }
- for (; i < nvc0->num_tfbbufs; ++i)
- pipe_resource_reference(&nvc0->tfbbuf[i], NULL);
+ for (i = 0; i < num_targets; ++i) {
+ if (nvc0->tfbbuf[i] == targets[i] && (append_mask & (1 << i)))
+ continue;
+ nvc0->tfbbuf_dirty |= 1 << i;
- nvc0->num_tfbbufs = num_buffers;
+ if (nvc0->tfbbuf[i] && nvc0->tfbbuf[i] != targets[i])
+ nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize);
+
+ if (targets[i] && !(append_mask & (1 << i)))
+ nvc0_so_target(targets[i])->clean = TRUE;
+
+ pipe_so_target_reference(&nvc0->tfbbuf[i], targets[i]);
+ }
+ for (; i < nvc0->num_tfbbufs; ++i) {
+ nvc0->tfbbuf_dirty |= 1 << i;
+ nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize);
+ pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
+ }
+ nvc0->num_tfbbufs = num_targets;
- nvc0->dirty |= NVC0_NEW_TFB_BUFFERS;
+ if (nvc0->tfbbuf_dirty)
+ nvc0->dirty |= NVC0_NEW_TFB_TARGETS;
}
void
@@ -871,17 +879,9 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
pipe->set_vertex_buffers = nvc0_set_vertex_buffers;
pipe->set_index_buffer = nvc0_set_index_buffer;
-#if 0
- pipe->create_stream_output_state = nvc0_tfb_state_create;
- pipe->delete_stream_output_state = nvc0_tfb_state_delete;
- pipe->bind_stream_output_state = nvc0_tfb_state_bind;
- pipe->set_stream_output_buffers = nvc0_set_transform_feedback_buffers;
-#else
- (void)nvc0_tfb_state_create;
- (void)nvc0_tfb_state_delete;
- (void)nvc0_tfb_state_bind;
- (void)nvc0_set_transform_feedback_buffers;
-#endif
+ pipe->create_stream_output_target = nvc0_so_target_create;
+ pipe->stream_output_target_destroy = nvc0_so_target_destroy;
+ pipe->set_stream_output_targets = nvc0_set_transform_feedback_targets;
pipe->redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c
index 1ec95b7f8b5..0dc822a2f15 100644
--- a/src/gallium/drivers/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c
@@ -428,6 +428,7 @@ nvc0_validate_derived_1(struct nvc0_context *nvc0)
{
struct nouveau_channel *chan = nvc0->screen->base.channel;
boolean early_z;
+ boolean rasterizer_discard;
early_z = nvc0->fragprog->fp.early_z && !nvc0->zsa->pipe.alpha.enabled;
@@ -435,6 +436,16 @@ nvc0_validate_derived_1(struct nvc0_context *nvc0)
nvc0->state.early_z = early_z;
IMMED_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), early_z);
}
+
+ rasterizer_discard = (!nvc0->fragprog || !nvc0->fragprog->hdr[18]) &&
+ !nvc0->zsa->pipe.depth.enabled && !nvc0->zsa->pipe.stencil[0].enabled;
+ rasterizer_discard = rasterizer_discard ||
+ nvc0->rast->pipe.rasterizer_discard;
+
+ if (rasterizer_discard != nvc0->state.rasterizer_discard) {
+ nvc0->state.rasterizer_discard = rasterizer_discard;
+ IMMED_RING(chan, RING_3D(RASTERIZE_ENABLE), !rasterizer_discard);
+ }
}
static void
@@ -484,13 +495,14 @@ static struct state_validate {
{ nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
{ nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
{ nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
- { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA },
+ { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
+ NVC0_NEW_RASTERIZER },
{ nvc0_validate_clip, NVC0_NEW_CLIP },
{ nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
{ nvc0_validate_textures, NVC0_NEW_TEXTURES },
{ nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
{ nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
- { nvc0_tfb_validate, NVC0_NEW_TFB | NVC0_NEW_TFB_BUFFERS }
+ { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h
index b508000bdab..5c0d0c1149b 100644
--- a/src/gallium/drivers/nvc0/nvc0_stateobj.h
+++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h
@@ -50,11 +50,17 @@ struct nvc0_vertex_stateobj {
struct nvc0_vertex_element element[0];
};
-/* will have to lookup index -> location qualifier from nvc0_program */
-struct nvc0_transform_feedback_state {
- uint32_t stride[4];
- uint8_t varying_count[4];
- uint8_t varying_index[0];
+struct nvc0_so_target {
+ struct pipe_stream_output_target pipe;
+ struct pipe_query *pq;
+ unsigned stride;
+ boolean clean;
};
+static INLINE struct nvc0_so_target *
+nvc0_so_target(struct pipe_stream_output_target *ptarg)
+{
+ return (struct nvc0_so_target *)ptarg;
+}
+
#endif
diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c
index f807535f046..a2e1a853995 100644
--- a/src/gallium/drivers/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nvc0/nvc0_surface.c
@@ -744,7 +744,8 @@ nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit)
IMMED_RING(chan, RING_3D(STENCIL_ENABLE), 0);
IMMED_RING(chan, RING_3D(ALPHA_TEST_ENABLE), 0);
- /* transform feedback ? */
+ /* disable transform feedback */
+ IMMED_RING(chan, RING_3D(TFB_ENABLE), 0);
}
static void
@@ -830,7 +831,8 @@ nvc0_blitctx_post_blit(struct nvc0_context *nvc0, struct nvc0_blitctx *blit)
NVC0_NEW_RASTERIZER | NVC0_NEW_ZSA | NVC0_NEW_BLEND |
NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS |
NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG |
- NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG);
+ NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG |
+ NVC0_NEW_TFB_TARGETS);
}
static void
diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c
index 50e99ac5df3..7cf69910e6a 100644
--- a/src/gallium/drivers/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nvc0/nvc0_vbo.c
@@ -569,6 +569,38 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
}
}
+static void
+nvc0_draw_stream_output(struct nvc0_context *nvc0,
+ const struct pipe_draw_info *info)
+{
+ struct nouveau_channel *chan = nvc0->screen->base.channel;
+ struct nvc0_so_target *so = nvc0_so_target(info->count_from_stream_output);
+ struct nv04_resource *res = nv04_resource(so->pipe.buffer);
+ unsigned mode = nvc0_prim_gl(info->mode);
+ unsigned num_instances = info->instance_count;
+
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ IMMED_RING(chan, RING_3D(SERIALIZE), 0);
+ nvc0_query_fifo_wait(chan, so->pq);
+ IMMED_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 0);
+ }
+
+ while (num_instances--) {
+ BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1);
+ OUT_RING (chan, mode);
+ BEGIN_RING(chan, RING_3D(DRAW_TFB_BASE), 1);
+ OUT_RING (chan, 0);
+ BEGIN_RING(chan, RING_3D(DRAW_TFB_STRIDE), 1);
+ OUT_RING (chan, so->stride);
+ BEGIN_RING(chan, RING_3D(DRAW_TFB_BYTES), 1);
+ nvc0_query_pushbuf_submit(chan, so->pq, 0x4);
+ IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0);
+
+ mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+}
+
void
nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
{
@@ -615,6 +647,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nvc0->base.vbo_dirty = FALSE;
}
+ if (unlikely(info->count_from_stream_output)) {
+ nvc0_draw_stream_output(nvc0, info);
+ } else
if (!info->indexed) {
nvc0_draw_arrays(nvc0,
info->mode, info->start, info->count,