summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorChristoph Bumiller <[email protected]>2012-05-03 12:50:08 +0200
committerChristoph Bumiller <[email protected]>2012-05-04 18:00:07 +0200
commit02fac2930581b9bea9f6d221eb6d6b471fc3b9c6 (patch)
tree1dd1e4074616f0af628c00469d7aa60aacdb8c76 /src
parent8a44ecdae8bde4767a6eea9b641d4fbe378a9269 (diff)
nv50: implement stream output
Diffstat (limited to 'src')
-rw-r--r--src/gallium/drivers/nv50/nv50_context.h23
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c57
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h11
-rw-r--r--src/gallium/drivers/nv50/nv50_push.c24
-rw-r--r--src/gallium/drivers/nv50/nv50_query.c79
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c10
-rw-r--r--src/gallium/drivers/nv50/nv50_shader_state.c110
-rw-r--r--src/gallium/drivers/nv50/nv50_state.c91
-rw-r--r--src/gallium/drivers/nv50/nv50_state_validate.c2
-rw-r--r--src/gallium/drivers/nv50/nv50_stateobj.h13
-rw-r--r--src/gallium/drivers/nv50/nv50_vbo.c80
-rw-r--r--src/gallium/drivers/nv50/nv50_winsys.h1
12 files changed, 468 insertions, 33 deletions
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 1cee0e06c02..44a0ba0f561 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -48,6 +48,7 @@
#define NV50_NEW_CONSTBUF (1 << 18)
#define NV50_NEW_TEXTURES (1 << 19)
#define NV50_NEW_SAMPLERS (1 << 20)
+#define NV50_NEW_STRMOUT (1 << 21)
#define NV50_NEW_CONTEXT (1 << 31)
#define NV50_BIND_FB 0
@@ -56,9 +57,10 @@
#define NV50_BIND_INDEX 3
#define NV50_BIND_TEXTURES 4
#define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i))
-#define NV50_BIND_SCREEN 53
-#define NV50_BIND_TLS 54
-#define NV50_BIND_COUNT 55
+#define NV50_BIND_SO 53
+#define NV50_BIND_SCREEN 54
+#define NV50_BIND_TLS 55
+#define NV50_BIND_COUNT 56
#define NV50_BIND_2D 0
#define NV50_BIND_M2MF 0
#define NV50_BIND_FENCE 1
@@ -92,11 +94,13 @@ struct nv50_context {
boolean point_sprite;
boolean rt_serialize;
boolean flushed;
+ boolean rasterizer_discard;
uint8_t tls_required;
uint8_t num_vtxbufs;
uint8_t num_vtxelts;
uint8_t num_textures[3];
uint8_t num_samplers[3];
+ uint8_t prim_size;
uint16_t scissor;
} state;
@@ -126,6 +130,10 @@ struct nv50_context {
struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
unsigned num_samplers[3];
+ uint8_t num_so_targets;
+ uint8_t so_targets_dirty;
+ struct pipe_stream_output_target *so_target[4];
+
struct pipe_framebuffer_state framebuffer;
struct pipe_blend_color blend_colour;
struct pipe_stencil_ref stencil_ref;
@@ -168,6 +176,14 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
/* nv50_query.c */
void nv50_init_query_functions(struct nv50_context *);
+void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
+ struct pipe_query *, unsigned result_offset);
+void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
+void nva0_so_target_save_offset(struct pipe_context *,
+ struct pipe_stream_output_target *,
+ unsigned index, boolean seralize);
+
+#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
/* nv50_shader_state.c */
void nv50_vertprog_validate(struct nv50_context *);
@@ -177,6 +193,7 @@ void nv50_fp_linkage_validate(struct nv50_context *);
void nv50_gp_linkage_validate(struct nv50_context *);
void nv50_constbufs_validate(struct nv50_context *);
void nv50_validate_derived_rs(struct nv50_context *);
+void nv50_stream_output_validate(struct nv50_context *);
/* nv50_state.c */
extern void nv50_init_state_functions(struct nv50_context *);
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 1b2e2934b79..48e8db333f9 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -235,6 +235,59 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info)
}
}
+static struct nv50_stream_output_state *
+nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info,
+ const struct pipe_stream_output_info *pso)
+{
+ struct nv50_stream_output_state *so;
+ unsigned b, i, c;
+ unsigned base[4];
+
+ so = MALLOC_STRUCT(nv50_stream_output_state);
+ if (!so)
+ return NULL;
+ memset(so->map, 0xff, sizeof(so->map));
+
+ for (b = 0; b < 4; ++b)
+ so->num_attribs[b] = 0;
+ for (i = 0; i < pso->num_outputs; ++i) {
+ unsigned end = pso->output[i].dst_offset + pso->output[i].num_components;
+ b = pso->output[i].output_buffer;
+ assert(b < 4);
+ so->num_attribs[b] = MAX2(so->num_attribs[b], end);
+ }
+
+ so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED;
+
+ so->stride[0] = pso->stride[0] * 4;
+ base[0] = 0;
+ for (b = 1; b < 4; ++b) {
+ assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]);
+ so->stride[b] = so->num_attribs[b] * 4;
+ if (so->num_attribs[b])
+ so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT;
+ base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4);
+ }
+ if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) {
+ assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX);
+ so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT;
+ }
+
+ so->map_size = base[3] + so->num_attribs[3];
+
+ for (i = 0; i < pso->num_outputs; ++i) {
+ const unsigned s = pso->output[i].start_component;
+ const unsigned p = pso->output[i].dst_offset;
+ const unsigned r = pso->output[i].register_index;
+ b = pso->output[i].output_buffer;
+
+ for (c = 0; c < pso->output[i].num_components; ++c)
+ so->map[base[b] + p + c] = info->out[r].slot[s + c];
+ }
+
+ return so;
+}
+
boolean
nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
{
@@ -293,6 +346,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset)
prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL;
}
+ if (prog->pipe.stream_output.num_outputs)
+ prog->so = nv50_program_create_strmout_state(info,
+ &prog->pipe.stream_output);
+
out:
FREE(info);
return !ret;
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 92361ad9946..f56268b5439 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -42,6 +42,15 @@ struct nv50_varying {
ubyte si; /* semantic index */
};
+struct nv50_stream_output_state
+{
+ uint32_t ctrl;
+ uint16_t stride[4];
+ uint8_t num_attribs[4];
+ uint8_t map_size;
+ uint8_t map[128];
+};
+
struct nv50_program {
struct pipe_shader_state pipe;
@@ -88,6 +97,8 @@ struct nv50_program {
void *fixups; /* relocation records */
struct nouveau_heap *mem;
+
+ struct nv50_stream_output_state *so;
};
boolean nv50_program_translate(struct nv50_program *, uint16_t chipset);
diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c
index 04e32b7e8b9..3abe189e7b5 100644
--- a/src/gallium/drivers/nv50/nv50_push.c
+++ b/src/gallium/drivers/nv50/nv50_push.c
@@ -210,7 +210,8 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
{
struct push_context ctx;
unsigned i, index_size;
- unsigned inst = info->instance_count;
+ unsigned inst_count = info->instance_count;
+ unsigned vert_count = info->count;
boolean apply_bias = info->indexed && info->index_bias;
ctx.push = nv50->base.pushbuf;
@@ -242,6 +243,17 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
ctx.primitive_restart = info->primitive_restart;
ctx.restart_index = info->restart_index;
} else {
+ if (unlikely(info->count_from_stream_output)) {
+ struct pipe_context *pipe = &nv50->base.pipe;
+ struct nv50_so_target *targ;
+ targ = nv50_so_target(info->count_from_stream_output);
+ if (!targ->pq) {
+ NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
+ return;
+ }
+ pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+ vert_count /= targ->stride;
+ }
ctx.idxbuf = NULL;
index_size = 0;
ctx.primitive_restart = FALSE;
@@ -262,21 +274,21 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info)
}
nv50->state.prim_restart = info->primitive_restart;
- while (inst--) {
+ while (inst_count--) {
BEGIN_NV04(ctx.push, NV50_3D(VERTEX_BEGIN_GL), 1);
PUSH_DATA (ctx.push, ctx.prim);
switch (index_size) {
case 0:
- emit_vertices_seq(&ctx, info->start, info->count);
+ emit_vertices_seq(&ctx, info->start, vert_count);
break;
case 1:
- emit_vertices_i08(&ctx, info->start, info->count);
+ emit_vertices_i08(&ctx, info->start, vert_count);
break;
case 2:
- emit_vertices_i16(&ctx, info->start, info->count);
+ emit_vertices_i16(&ctx, info->start, vert_count);
break;
case 4:
- emit_vertices_i32(&ctx, info->start, info->count);
+ emit_vertices_i32(&ctx, info->start, vert_count);
break;
default:
assert(0);
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 5275e74964a..8e62c5f11bc 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -36,7 +36,8 @@
struct nv50_query {
uint32_t *data;
- uint32_t type;
+ uint16_t type;
+ uint16_t index;
uint32_t sequence;
struct nouveau_bo *bo;
uint32_t base;
@@ -170,21 +171,15 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1);
PUSH_DATA (push, 1);
break;
- case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */
- PUSH_SPACE(push, 2);
- BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
- PUSH_DATA (push, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nv50_query_get(push, q, 0x10, 0x06805002);
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
- PUSH_SPACE(push, 2);
- BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1);
- PUSH_DATA (push, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK);
+ nv50_query_get(push, q, 0x10, 0x05805002);
break;
case PIPE_QUERY_SO_STATISTICS:
- PUSH_SPACE(push, 3);
- BEGIN_NI04(push, NV50_3D(COUNTER_RESET), 2);
- PUSH_DATA (push, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK);
- PUSH_DATA (push, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES);
+ nv50_query_get(push, q, 0x20, 0x05805002);
+ nv50_query_get(push, q, 0x30, 0x06805002);
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT:
case PIPE_QUERY_TIME_ELAPSED:
@@ -227,6 +222,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
case PIPE_QUERY_GPU_FINISHED:
nv50_query_get(push, q, 0, 0x1000f010);
break;
+ case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+ nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
+ break;
default:
assert(0);
break;
@@ -247,6 +245,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
struct nv50_context *nv50 = nv50_context(pipe);
struct nv50_query *q = nv50_query(pq);
uint64_t *res64 = (uint64_t *)result;
+ uint32_t *res32 = (uint32_t *)result;
boolean *res8 = (boolean *)result;
uint64_t *data64 = (uint64_t *)q->data;
@@ -275,11 +274,11 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
break;
case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
- res64[0] = data64[0];
+ res64[0] = data64[0] - data64[2];
break;
case PIPE_QUERY_SO_STATISTICS:
- res64[0] = data64[0];
- res64[1] = data64[1];
+ res64[0] = data64[0] - data64[4];
+ res64[1] = data64[2] - data64[6];
break;
case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */
res64[0] = 1000000000;
@@ -288,6 +287,9 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
case PIPE_QUERY_TIME_ELAPSED:
res64[0] = data64[1] - data64[3];
break;
+ case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+ res32[0] = q->data[1];
+ break;
default:
return FALSE;
}
@@ -295,6 +297,21 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq,
return TRUE;
}
+void
+nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
+{
+ struct nv50_query *q = nv50_query(pq);
+ unsigned offset = q->offset;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, q->bo->offset + offset);
+ PUSH_DATA (push, q->bo->offset + offset);
+ PUSH_DATA (push, q->sequence);
+ PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
+}
+
static void
nv50_render_condition(struct pipe_context *pipe,
struct pipe_query *pq, uint mode)
@@ -325,6 +342,38 @@ nv50_render_condition(struct pipe_context *pipe,
}
void
+nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
+ struct pipe_query *pq, unsigned result_offset)
+{
+ struct nv50_query *q = nv50_query(pq);
+
+ /* XXX: does this exist ? */
+#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
+
+ nouveau_pushbuf_space(push, 0, 0, 1);
+ nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
+ NV50_IB_ENTRY_1_NO_PREFETCH);
+}
+
+void
+nva0_so_target_save_offset(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg,
+ unsigned index, boolean serialize)
+{
+ struct nv50_so_target *targ = nv50_so_target(ptarg);
+
+ if (serialize) {
+ struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf;
+ PUSH_SPACE(push, 2);
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ nv50_query(targ->pq)->index = index;
+ nv50_query_end(pipe, targ->pq);
+}
+
+void
nv50_init_query_functions(struct nv50_context *nv50)
{
struct pipe_context *pipe = &nv50->base.pipe;
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index d72b6e91d2a..b341ade695e 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -73,6 +73,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
static int
nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
{
+ const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
+
switch (param) {
case PIPE_CAP_MAX_COMBINED_SAMPLERS:
return 64;
@@ -95,7 +97,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_ANISOTROPIC_FILTER:
case PIPE_CAP_SCALED_RESOLVE:
return 1;
- case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
case PIPE_CAP_SEAMLESS_CUBE_MAP:
return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS;
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
@@ -121,11 +122,12 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_OCCLUSION_QUERY:
return 1;
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
- return 0;
+ return 4;
case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
- return 128;
case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
- return 32;
+ return 64;
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ return (class_3d >= NVA0_3D_CLASS) ? 1 : 0;
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
case PIPE_CAP_INDEP_BLEND_ENABLE:
return 1;
diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c
index aef3f129c81..d070f07bbbc 100644
--- a/src/gallium/drivers/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nv50/nv50_shader_state.c
@@ -207,6 +207,8 @@ nv50_gmtyprog_validate(struct nv50_context *nv50)
PUSH_DATA (push, gp->gp.vert_count);
BEGIN_NV04(push, NV50_3D(GP_START_ID), 1);
PUSH_DATA (push, gp->code_base);
+
+ nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */
}
nv50_program_update_context_state(nv50, gp, 2);
@@ -278,6 +280,12 @@ nv50_validate_derived_rs(struct nv50_context *nv50)
nv50_sprite_coords_validate(nv50);
+ if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) {
+ nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard;
+ BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);
+ PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard);
+ }
+
if (nv50->dirty & NV50_NEW_FRAGPROG)
return;
psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK;
@@ -343,6 +351,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
uint32_t colors = fp->fp.colors;
uint32_t lin[4];
uint8_t map[64];
+ uint8_t so_map[64];
if (!(nv50->dirty & (NV50_NEW_VERTPROG |
NV50_NEW_FRAGPROG |
@@ -411,6 +420,30 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
if (nv50->rast->pipe.clamp_vertex_color)
colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN;
+ if (unlikely(vp->so)) {
+ /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP
+ * gets written.
+ *
+ * TODO:
+ * Inverting vp->so->map (output -> offset) would probably speed this up.
+ */
+ memset(so_map, 0, sizeof(so_map));
+ for (i = 0; i < vp->so->map_size; ++i) {
+ if (vp->so->map[i] == 0xff)
+ continue;
+ for (c = 0; c < m; ++c)
+ if (map[c] == vp->so->map[i] && !so_map[c])
+ break;
+ if (c == m) {
+ c = m;
+ map[m++] = vp->so->map[i];
+ }
+ so_map[c] = 0x80 | i;
+ }
+ for (c = m; c & 3; ++c)
+ so_map[c] = 0;
+ }
+
n = (m + 3) / 4;
assert(m <= 64);
@@ -451,6 +484,11 @@ nv50_fp_linkage_validate(struct nv50_context *nv50)
BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1);
PUSH_DATA (push, nv50->gmtyprog ? 1 : 0);
+
+ if (vp->so) {
+ BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n);
+ PUSH_DATAp(push, so_map, n);
+ }
}
static int
@@ -509,3 +547,75 @@ nv50_gp_linkage_validate(struct nv50_context *nv50)
BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n);
PUSH_DATAp(push, map, n);
}
+
+void
+nv50_stream_output_validate(struct nv50_context *nv50)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_stream_output_state *so;
+ uint32_t ctrl;
+ unsigned i;
+ unsigned prims = ~0;
+
+ so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so;
+
+ if (!so || !nv50->num_so_targets) {
+ BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ if (nv50->screen->base.class_3d < NVA0_3D_CLASS) {
+ BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
+ PUSH_DATA (push, 0);
+ }
+ BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
+ PUSH_DATA (push, 1);
+ return;
+ }
+
+ ctrl = so->ctrl;
+ if (nv50->screen->base.class_3d >= NVA0_3D_CLASS)
+ ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET;
+
+ BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1);
+ PUSH_DATA (push, ctrl);
+
+ nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO);
+
+ for (i = 0; i < nv50->num_so_targets; ++i) {
+ struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]);
+ struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
+
+ const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3;
+
+ if (n == 4 && !targ->clean)
+ nv84_query_fifo_wait(push, targ->pq);
+ BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n);
+ PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
+ PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
+ PUSH_DATA (push, so->num_attribs[i]);
+ if (n == 4) {
+ PUSH_DATA(push, targ->pipe.buffer_size);
+
+ BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
+ if (!targ->clean) {
+ assert(targ->pq);
+ nv50_query_pushbuf_submit(push, targ->pq, 0x4);
+ } else {
+ PUSH_DATA(push, 0);
+ targ->clean = FALSE;
+ }
+ } else {
+ const unsigned limit = targ->pipe.buffer_size /
+ (so->stride[i] * nv50->state.prim_size);
+ prims = MIN2(prims, limit);
+ }
+ BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
+ }
+ if (prims != ~0) {
+ BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1);
+ PUSH_DATA (push, prims);
+ }
+ BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1);
+ PUSH_DATA (push, 1);
+}
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 5b783da7ad7..5e32b2717fd 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -680,6 +680,9 @@ nv50_sp_state_create(struct pipe_context *pipe,
prog->type = type;
prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+ if (cso->stream_output.num_outputs)
+ prog->pipe.stream_output = cso->stream_output;
+
return (void *)prog;
}
@@ -909,6 +912,90 @@ nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
nv50->dirty |= NV50_NEW_VERTEX;
}
+static struct pipe_stream_output_target *
+nv50_so_target_create(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size)
+{
+ struct nv50_so_target *targ = MALLOC_STRUCT(nv50_so_target);
+ if (!targ)
+ return NULL;
+
+ if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) {
+ targ->pq = pipe->create_query(pipe,
+ NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET);
+ if (!targ->pq) {
+ FREE(targ);
+ return NULL;
+ }
+ } else {
+ targ->pq = NULL;
+ }
+ targ->clean = TRUE;
+
+ targ->pipe.buffer_size = size;
+ targ->pipe.buffer_offset = offset;
+ targ->pipe.context = pipe;
+ targ->pipe.buffer = NULL;
+ pipe_resource_reference(&targ->pipe.buffer, res);
+ pipe_reference_init(&targ->pipe.reference, 1);
+
+ return &targ->pipe;
+}
+
+static void
+nv50_so_target_destroy(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg)
+{
+ struct nv50_so_target *targ = nv50_so_target(ptarg);
+ if (targ->pq)
+ pipe->destroy_query(pipe, targ->pq);
+ FREE(targ);
+}
+
+static void
+nv50_set_stream_output_targets(struct pipe_context *pipe,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ unsigned append_mask)
+{
+ struct nv50_context *nv50 = nv50_context(pipe);
+ unsigned i;
+ boolean serialize = TRUE;
+ const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS;
+
+ assert(num_targets <= 4);
+
+ for (i = 0; i < num_targets; ++i) {
+ const boolean changed = nv50->so_target[i] != targets[i];
+ if (!changed && (append_mask & (1 << i)))
+ continue;
+ nv50->so_targets_dirty |= 1 << i;
+
+ if (can_resume && changed && nv50->so_target[i]) {
+ nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
+ serialize = FALSE;
+ }
+
+ if (targets[i] && !(append_mask & (1 << i)))
+ nv50_so_target(targets[i])->clean = TRUE;
+
+ pipe_so_target_reference(&nv50->so_target[i], targets[i]);
+ }
+ for (; i < nv50->num_so_targets; ++i) {
+ if (can_resume && nv50->so_target[i]) {
+ nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize);
+ serialize = FALSE;
+ }
+ pipe_so_target_reference(&nv50->so_target[i], NULL);
+ nv50->so_targets_dirty |= 1 << i;
+ }
+ nv50->num_so_targets = num_targets;
+
+ if (nv50->so_targets_dirty)
+ nv50->dirty |= NV50_NEW_STRMOUT;
+}
+
void
nv50_init_state_functions(struct nv50_context *nv50)
{
@@ -965,6 +1052,10 @@ nv50_init_state_functions(struct nv50_context *nv50)
pipe->set_vertex_buffers = nv50_set_vertex_buffers;
pipe->set_index_buffer = nv50_set_index_buffer;
+ pipe->create_stream_output_target = nv50_so_target_create;
+ pipe->stream_output_target_destroy = nv50_so_target_destroy;
+ pipe->set_stream_output_targets = nv50_set_stream_output_targets;
+
pipe->redefine_user_buffer = u_default_redefine_user_buffer;
}
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index c19acf6c426..a95e96d3c51 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -360,6 +360,8 @@ static struct state_validate {
{ nv50_constbufs_validate, NV50_NEW_CONSTBUF },
{ nv50_validate_textures, NV50_NEW_TEXTURES },
{ nv50_validate_samplers, NV50_NEW_SAMPLERS },
+ { nv50_stream_output_validate, NV50_NEW_STRMOUT |
+ NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS }
};
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h
index 188406da600..8a9260c937e 100644
--- a/src/gallium/drivers/nv50/nv50_stateobj.h
+++ b/src/gallium/drivers/nv50/nv50_stateobj.h
@@ -51,4 +51,17 @@ struct nv50_vertex_stateobj {
struct nv50_vertex_element element[0];
};
+struct nv50_so_target {
+ struct pipe_stream_output_target pipe;
+ struct pipe_query *pq;
+ unsigned stride;
+ boolean clean;
+};
+
+static INLINE struct nv50_so_target *
+nv50_so_target(struct pipe_stream_output_target *ptarg)
+{
+ return (struct nv50_so_target *)ptarg;
+}
+
#endif
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index bc01e69decf..323677eaf80 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -405,6 +405,25 @@ nv50_prim_gl(unsigned prim)
}
}
+/* For pre-nva0 transform feedback. */
+static const uint8_t nv50_pipe_prim_to_prim_size[PIPE_PRIM_MAX + 1] =
+{
+ [PIPE_PRIM_POINTS] = 1,
+ [PIPE_PRIM_LINES] = 2,
+ [PIPE_PRIM_LINE_LOOP] = 2,
+ [PIPE_PRIM_LINE_STRIP] = 2,
+ [PIPE_PRIM_TRIANGLES] = 3,
+ [PIPE_PRIM_TRIANGLE_STRIP] = 3,
+ [PIPE_PRIM_TRIANGLE_FAN] = 3,
+ [PIPE_PRIM_QUADS] = 3,
+ [PIPE_PRIM_QUAD_STRIP] = 3,
+ [PIPE_PRIM_POLYGON] = 3,
+ [PIPE_PRIM_LINES_ADJACENCY] = 2,
+ [PIPE_PRIM_LINE_STRIP_ADJACENCY] = 2,
+ [PIPE_PRIM_TRIANGLES_ADJACENCY] = 3,
+ [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = 3
+};
+
static void
nv50_draw_arrays(struct nv50_context *nv50,
unsigned mode, unsigned start, unsigned count,
@@ -624,6 +643,51 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten,
}
static void
+nva0_draw_stream_output(struct nv50_context *nv50,
+ const struct pipe_draw_info *info)
+{
+ struct nouveau_pushbuf *push = nv50->base.pushbuf;
+ struct nv50_so_target *so = nv50_so_target(info->count_from_stream_output);
+ struct nv04_resource *res = nv04_resource(so->pipe.buffer);
+ unsigned num_instances = info->instance_count;
+ unsigned mode = nv50_prim_gl(info->mode);
+
+ if (unlikely(nv50->screen->base.class_3d < NVA0_3D_CLASS)) {
+ /* A proper implementation without waiting doesn't seem possible,
+ * so don't bother.
+ */
+ NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n");
+ return;
+ }
+
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ PUSH_SPACE(push, 4);
+ BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ assert(num_instances);
+ do {
+ PUSH_SPACE(push, 8);
+ BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, mode);
+ BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
+ nv50_query_pushbuf_submit(push, so->pq, 0x4);
+ BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
+ PUSH_DATA (push, 0);
+
+ mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ } while (--num_instances);
+}
+
+static void
nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan)
{
struct nv50_screen *screen = chan->user_priv;
@@ -655,6 +719,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS)))
nv50_update_user_vbufs(nv50);
+ if (unlikely(nv50->num_so_targets && !nv50->gmtyprog))
+ nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode];
+
nv50_state_validate(nv50, ~0, 8); /* 8 as minimum, we use flush_notify */
push->kick_notify = nv50_draw_vbo_kick_notify;
@@ -679,11 +746,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nv50->base.vbo_dirty = FALSE;
}
- if (!info->indexed) {
- nv50_draw_arrays(nv50,
- info->mode, info->start, info->count,
- info->instance_count);
- } else {
+ if (info->indexed) {
boolean shorten = info->max_index <= 65535;
assert(nv50->idxbuf.buffer);
@@ -713,6 +776,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
nv50_draw_elements(nv50, shorten,
info->mode, info->start, info->count,
info->instance_count, info->index_bias);
+ } else
+ if (unlikely(info->count_from_stream_output)) {
+ nva0_draw_stream_output(nv50, info);
+ } else {
+ nv50_draw_arrays(nv50,
+ info->mode, info->start, info->count,
+ info->instance_count);
}
push->kick_notify = nv50_default_kick_notify;
diff --git a/src/gallium/drivers/nv50/nv50_winsys.h b/src/gallium/drivers/nv50/nv50_winsys.h
index b36898dabe6..145ee70cb9f 100644
--- a/src/gallium/drivers/nv50/nv50_winsys.h
+++ b/src/gallium/drivers/nv50/nv50_winsys.h
@@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
#define SUBC_3D(m) 3, (m)
#define NV50_3D(n) SUBC_3D(NV50_3D_##n)
+#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n)
#define SUBC_2D(m) 4, (m)
#define NV50_2D(n) SUBC_2D(NV50_2D_##n)