diff options
author | Christoph Bumiller <[email protected]> | 2011-12-09 18:46:09 +0100 |
---|---|---|
committer | Christoph Bumiller <[email protected]> | 2011-12-15 18:51:48 +0100 |
commit | 14bd9d764802b5fedb652c791faafe4d13b65262 (patch) | |
tree | 4dba8e6abb0d7d6914fa7f265b5f81127c86c262 /src/gallium/drivers/nvc0 | |
parent | 14193da589275969be31dbdb3280bb48cd24d0c0 (diff) |
nvc0: implement new stream output interface
Diffstat (limited to 'src/gallium/drivers/nvc0')
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_3d.xml.h | 8 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_context.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_context.h | 25 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_program.c | 43 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_program.h | 11 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_push.c | 21 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_query.c | 135 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_screen.c | 8 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_shader_state.c | 104 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_state.c | 118 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_state_validate.c | 16 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_stateobj.h | 16 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_surface.c | 6 | ||||
-rw-r--r-- | src/gallium/drivers/nvc0/nvc0_vbo.c | 35 |
14 files changed, 364 insertions, 184 deletions
diff --git a/src/gallium/drivers/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nvc0/nvc0_3d.xml.h index a8d91082740..c32fa3ac939 100644 --- a/src/gallium/drivers/nvc0/nvc0_3d.xml.h +++ b/src/gallium/drivers/nvc0/nvc0_3d.xml.h @@ -130,11 +130,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0)) -#define NVC0_3D_TFB_PRIMITIVE_ID(i0) (0x00000390 + 0x20*(i0)) +#define NVC0_3D_TFB_BUFFER_OFFSET(i0) (0x00000390 + 0x20*(i0)) -#define NVC0_3D_TFB_UNK07X0(i0) (0x00000700 + 0x10*(i0)) -#define NVC0_3D_TFB_UNK07X0__ESIZE 0x00000010 -#define NVC0_3D_TFB_UNK07X0__LEN 0x00000004 +#define NVC0_3D_TFB_STREAM(i0) (0x00000700 + 0x10*(i0)) +#define NVC0_3D_TFB_STREAM__ESIZE 0x00000010 +#define NVC0_3D_TFB_STREAM__LEN 0x00000004 #define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0)) #define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010 diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c index 1bb90a38725..1b3a06dfa33 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.c +++ b/src/gallium/drivers/nvc0/nvc0_context.c @@ -77,7 +77,7 @@ nvc0_context_unreference_resources(struct nvc0_context *nvc0) } for (i = 0; i < nvc0->num_tfbbufs; ++i) - pipe_resource_reference(&nvc0->tfbbuf[i], NULL); + pipe_so_target_reference(&nvc0->tfbbuf[i], NULL); } static void diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h index 4435c1b4f3c..af95d1ab6aa 100644 --- a/src/gallium/drivers/nvc0/nvc0_context.h +++ b/src/gallium/drivers/nvc0/nvc0_context.h @@ -49,14 +49,14 @@ #define NVC0_NEW_CONSTBUF (1 << 18) #define NVC0_NEW_TEXTURES (1 << 19) #define NVC0_NEW_SAMPLERS (1 << 20) -#define NVC0_NEW_TFB (1 << 21) -#define NVC0_NEW_TFB_BUFFERS (1 << 22) +#define NVC0_NEW_TFB_TARGETS (1 << 21) #define NVC0_BUFCTX_CONSTANT 0 #define NVC0_BUFCTX_FRAME 1 #define NVC0_BUFCTX_VERTEX 2 #define NVC0_BUFCTX_TEXTURES 3 -#define NVC0_BUFCTX_COUNT 4 +#define NVC0_BUFCTX_TFB 4 +#define NVC0_BUFCTX_COUNT 5 struct nvc0_context { struct nouveau_context base; @@ -75,6 +75,7 @@ struct nvc0_context { boolean prim_restart; boolean early_z; uint16_t scissor; + boolean rasterizer_discard; uint8_t num_vtxbufs; uint8_t num_vtxelts; uint8_t num_textures[5]; @@ -84,6 +85,7 @@ struct nvc0_context { uint8_t clip_enable; uint32_t clip_mode; uint32_t uniform_buffer_bound[5]; + struct nvc0_transform_feedback_state *tfb; } state; struct nvc0_blend_stateobj *blend; @@ -125,10 +127,9 @@ struct nvc0_context { boolean vbo_push_hint; - struct nvc0_transform_feedback_state *tfb; - struct pipe_resource *tfbbuf[4]; + uint8_t tfbbuf_dirty; + struct pipe_stream_output_target *tfbbuf[4]; unsigned num_tfbbufs; - unsigned tfb_offset[4]; struct draw_context *draw; }; @@ -170,10 +171,14 @@ void nvc0_program_library_upload(struct nvc0_context *); /* nvc0_query.c */ void nvc0_init_query_functions(struct nvc0_context *); -void nvc0_query_pushbuf_submit(struct nvc0_context *nvc0, - struct pipe_query *pq, unsigned result_offset); - -#define NVC0_QUERY_TFB_BUFFER_OFFSETS (PIPE_QUERY_TYPES + 0) +void nvc0_query_pushbuf_submit(struct nouveau_channel *, + struct pipe_query *, unsigned result_offset); +void nvc0_query_fifo_wait(struct nouveau_channel *, struct pipe_query *); +void nvc0_so_target_save_offset(struct pipe_context *, + struct pipe_stream_output_target *, unsigned i, + boolean *serialize); + +#define NVC0_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) /* nvc0_shader_state.c */ void nvc0_vertprog_validate(struct nvc0_context *); diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index f3185b488e8..605bca5e6ba 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -480,6 +480,40 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) return 0; } +static struct nvc0_transform_feedback_state * +nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info, + const struct pipe_stream_output_info *pso) +{ + struct nvc0_transform_feedback_state *tfb; + int n = 0; + int i, c, b; + + tfb = MALLOC(sizeof(*tfb) + pso->num_outputs * 4 * sizeof(uint8_t)); + if (!tfb) + return NULL; + + for (b = 0; b < 4; ++b) { + tfb->varying_count[b] = 0; + + for (i = 0; i < pso->num_outputs; ++i) { + if (pso->output[i].output_buffer != b) + continue; + for (c = 0; c < 4; ++c) { + if (!(pso->output[i].register_mask & (1 << c))) + continue; + tfb->varying_count[b]++; + tfb->varying_index[n++] = + info->out[pso->output[i].register_index].slot[c]; + } + } + tfb->stride[b] = tfb->varying_count[b] * 4; + } + if (pso->stride) + tfb->stride[0] = pso->stride; + + return tfb; +} + #ifdef DEBUG static void nvc0_program_dump(struct nvc0_program *prog) @@ -577,6 +611,10 @@ nvc0_program_translate(struct nvc0_program *prog) if (info->io.globalAccess) prog->hdr[0] |= 1 << 16; + if (prog->pipe.stream_output.num_outputs) + prog->tfb = nvc0_program_create_tfb_state(info, + &prog->pipe.stream_output); + out: FREE(info); return !ret; @@ -675,6 +713,11 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) FREE(prog->immd_data); if (prog->relocs) FREE(prog->relocs); + if (prog->tfb) { + if (nvc0->state.tfb == prog->tfb) + nvc0->state.tfb = NULL; + FREE(prog->tfb); + } memset(prog->hdr, 0, sizeof(prog->hdr)); diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h index b107850ea92..10eb9f724d5 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.h +++ b/src/gallium/drivers/nvc0/nvc0_program.h @@ -6,6 +6,14 @@ #define NVC0_CAP_MAX_PROGRAM_TEMPS 128 + +struct nvc0_transform_feedback_state { + uint32_t stride[4]; + uint8_t varying_count[4]; + uint8_t varying_index[0]; +}; + + #define NVC0_SHADER_HEADER_SIZE (20 * 4) struct nvc0_program { @@ -31,7 +39,6 @@ struct nvc0_program { uint8_t clip_enable; /* only applies if num_ucps == 0 */ uint8_t edgeflag; uint8_t num_ucps; - uint8_t out_pos[PIPE_MAX_SHADER_OUTPUTS]; } vp; struct { uint8_t early_z; @@ -44,6 +51,8 @@ struct nvc0_program { void *relocs; + struct nvc0_transform_feedback_state *tfb; + struct nouveau_resource *res; }; diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c index 2e9f4c10928..238671d721c 100644 --- a/src/gallium/drivers/nvc0/nvc0_push.c +++ b/src/gallium/drivers/nvc0/nvc0_push.c @@ -273,7 +273,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) { struct push_context ctx; unsigned i, index_size; - unsigned inst = info->instance_count; + unsigned inst_count = info->instance_count; + unsigned vert_count = info->count; boolean apply_bias = info->indexed && info->index_bias; init_push_context(nvc0, &ctx); @@ -312,26 +313,34 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info) index_size = 0; ctx.primitive_restart = FALSE; ctx.restart_index = 0; + + if (info->count_from_stream_output) { + struct pipe_context *pipe = &nvc0->base.pipe; + struct nvc0_so_target *targ; + targ = nvc0_so_target(info->count_from_stream_output); + pipe->get_query_result(pipe, targ->pq, TRUE, &vert_count); + vert_count /= targ->stride; + } } ctx.instance_id = info->start_instance; ctx.prim = nvc0_prim_gl(info->mode); - while (inst--) { + while (inst_count--) { BEGIN_RING(ctx.chan, RING_3D(VERTEX_BEGIN_GL), 1); OUT_RING (ctx.chan, ctx.prim); switch (index_size) { case 0: - emit_vertices_seq(&ctx, info->start, info->count); + emit_vertices_seq(&ctx, info->start, vert_count); break; case 1: - emit_vertices_i08(&ctx, info->start, info->count); + emit_vertices_i08(&ctx, info->start, vert_count); break; case 2: - emit_vertices_i16(&ctx, info->start, info->count); + emit_vertices_i16(&ctx, info->start, vert_count); break; case 4: - emit_vertices_i32(&ctx, info->start, info->count); + emit_vertices_i32(&ctx, info->start, vert_count); break; default: assert(0); diff --git a/src/gallium/drivers/nvc0/nvc0_query.c b/src/gallium/drivers/nvc0/nvc0_query.c index be363a21e6d..7244d838f7d 100644 --- a/src/gallium/drivers/nvc0/nvc0_query.c +++ b/src/gallium/drivers/nvc0/nvc0_query.c @@ -27,7 +27,8 @@ struct nvc0_query { uint32_t *data; - uint32_t type; + uint16_t type; + uint16_t index; uint32_t sequence; struct nouveau_bo *bo; uint32_t base; @@ -103,7 +104,6 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type) switch (type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: - case PIPE_QUERY_SO_OVERFLOW_PREDICATE: q->rotate = 32; space = NVC0_QUERY_ALLOC_SPACE; break; @@ -112,6 +112,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type) space = 512; break; case PIPE_QUERY_SO_STATISTICS: + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: q->is64bit = TRUE; space = 64; break; @@ -123,7 +124,7 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type) case PIPE_QUERY_PRIMITIVES_EMITTED: space = 32; break; - case NVC0_QUERY_TFB_BUFFER_OFFSETS: + case NVC0_QUERY_TFB_BUFFER_OFFSET: space = 16; break; default: @@ -141,7 +142,9 @@ nvc0_query_create(struct pipe_context *pipe, unsigned type) /* we advance before query_begin ! */ q->offset -= q->rotate; q->data -= q->rotate / sizeof(*q->data); - } + } else + if (!q->is64bit) + q->data[0] = 0; /* initialize sequence */ return (struct pipe_query *)q; } @@ -176,8 +179,6 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_channel *chan = nvc0->screen->base.channel; struct nvc0_query *q = nvc0_query(pq); - const int index = 0; /* vertex stream */ - /* For occlusion queries we have to change the storage, because a previous * query might set the initial render conition to FALSE even *after* we re- * initialized it to TRUE. @@ -188,12 +189,12 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) /* XXX: can we do this with the GPU, and sync with respect to a previous * query ? */ + q->data[0] = q->sequence; /* initialize sequence */ q->data[1] = 1; /* initial render condition = TRUE */ q->data[4] = q->sequence + 1; /* for comparison COND_MODE */ q->data[5] = 0; } - if (!q->is64bit) - q->data[0] = q->sequence++; /* the previously used one */ + q->sequence++; switch (q->type) { case PIPE_QUERY_OCCLUSION_COUNTER: @@ -208,14 +209,17 @@ nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq) } break; case PIPE_QUERY_PRIMITIVES_GENERATED: - nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5)); + nvc0_query_get(chan, q, 0x10, 0x06805002 | (q->index << 5)); break; case PIPE_QUERY_PRIMITIVES_EMITTED: - nvc0_query_get(chan, q, 0x10, 0x05805002 | (index << 5)); + nvc0_query_get(chan, q, 0x10, 0x05805002 | (q->index << 5)); break; case PIPE_QUERY_SO_STATISTICS: - nvc0_query_get(chan, q, 0x20, 0x05805002 | (index << 5)); - nvc0_query_get(chan, q, 0x30, 0x06805002 | (index << 5)); + nvc0_query_get(chan, q, 0x20, 0x05805002 | (q->index << 5)); + nvc0_query_get(chan, q, 0x30, 0x06805002 | (q->index << 5)); + break; + case PIPE_QUERY_SO_OVERFLOW_PREDICATE: + nvc0_query_get(chan, q, 0x10, 0x03005002 | (q->index << 5)); break; case PIPE_QUERY_TIMESTAMP_DISJOINT: case PIPE_QUERY_TIME_ELAPSED: @@ -247,15 +251,11 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) struct nouveau_channel *chan = nvc0->screen->base.channel; struct nvc0_query *q = nvc0_query(pq); - const int index = 0; /* for multiple vertex streams */ - if (!q->active) { /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */ if (q->rotate) nvc0_query_rotate(nvc0, q); - else - if (!q->is64bit) - q->data[0] = q->sequence++; + q->sequence++; } q->ready = FALSE; q->active = FALSE; @@ -268,17 +268,20 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) IMMED_RING(chan, RING_3D(SAMPLECNT_ENABLE), 0); break; case PIPE_QUERY_PRIMITIVES_GENERATED: - nvc0_query_get(chan, q, 0, 0x06805002 | (index << 5)); + nvc0_query_get(chan, q, 0, 0x06805002 | (q->index << 5)); break; case PIPE_QUERY_PRIMITIVES_EMITTED: - nvc0_query_get(chan, q, 0, 0x05805002 | (index << 5)); + nvc0_query_get(chan, q, 0, 0x05805002 | (q->index << 5)); break; case PIPE_QUERY_SO_STATISTICS: - nvc0_query_get(chan, q, 0x00, 0x05805002 | (index << 5)); - nvc0_query_get(chan, q, 0x10, 0x06805002 | (index << 5)); + nvc0_query_get(chan, q, 0x00, 0x05805002 | (q->index << 5)); + nvc0_query_get(chan, q, 0x10, 0x06805002 | (q->index << 5)); break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - nvc0_query_get(chan, q, 0x00, 0x02005002 | (index << 5)); + /* TODO: How do we sum over all streams for render condition ? */ + /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */ + nvc0_query_get(chan, q, 0x00, 0x03005002 | (q->index << 5)); + nvc0_query_get(chan, q, 0x20, 0x00005002); break; case PIPE_QUERY_TIMESTAMP: case PIPE_QUERY_TIMESTAMP_DISJOINT: @@ -300,11 +303,9 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) nvc0_query_get(chan, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */ nvc0_query_get(chan, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */ break; - case NVC0_QUERY_TFB_BUFFER_OFFSETS: - nvc0_query_get(chan, q, 0x00, 0x1d005002); /* TFB, BUFFER_OFFSET */ - nvc0_query_get(chan, q, 0x04, 0x1d005022); - nvc0_query_get(chan, q, 0x08, 0x1d005042); - nvc0_query_get(chan, q, 0x0c, 0x1d005062); + case NVC0_QUERY_TFB_BUFFER_OFFSET: + /* indexed by TFB buffer instead of by vertex stream */ + nvc0_query_get(chan, q, 0x00, 0x0d005002 | (q->index << 5)); break; default: assert(0); @@ -315,7 +316,14 @@ nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq) static INLINE boolean nvc0_query_ready(struct nvc0_query *q) { - return q->ready || (!q->is64bit && (q->data[0] == q->sequence)); + if (q->is64bit) { + if (nouveau_bo_map(q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_NOWAIT)) + return FALSE; + nouveau_bo_unmap(q->bo); + return TRUE; + } else { + return q->data[0] == q->sequence; + } } static INLINE boolean @@ -355,14 +363,12 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, switch (q->type) { case PIPE_QUERY_GPU_FINISHED: - res32[0] = 0; res8[0] = TRUE; break; case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ res64[0] = q->data[1] - q->data[5]; break; case PIPE_QUERY_OCCLUSION_PREDICATE: - res32[0] = 0; res8[0] = q->data[1] != q->data[5]; break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ @@ -374,15 +380,13 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, res64[1] = data64[2] - data64[6]; break; case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - res32[0] = 0; - res8[0] = !q->data[1]; + res8[0] = data64[0] != data64[2]; break; case PIPE_QUERY_TIMESTAMP: res64[0] = data64[1]; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */ res64[0] = 1000000000; - res32[2] = 0; res8[8] = (data64[1] == data64[3]) ? FALSE : TRUE; break; case PIPE_QUERY_TIME_ELAPSED: @@ -392,11 +396,8 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, for (i = 0; i < 10; ++i) res64[i] = data64[i * 2] - data64[24 + i * 2]; break; - case NVC0_QUERY_TFB_BUFFER_OFFSETS: - res32[0] = q->data[0]; - res32[1] = q->data[1]; - res32[2] = q->data[2]; - res32[3] = q->data[3]; + case NVC0_QUERY_TFB_BUFFER_OFFSET: + res32[0] = q->data[1]; break; default: return FALSE; @@ -405,6 +406,23 @@ nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq, return TRUE; } +void +nvc0_query_fifo_wait(struct nouveau_channel *chan, struct pipe_query *pq) +{ + struct nvc0_query *q = nvc0_query(pq); + unsigned offset = q->offset; + + if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20; + + MARK_RING (chan, 5, 2); + BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); + OUT_RELOCh(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, q->bo, offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RING (chan, q->sequence); + OUT_RING (chan, (1 << 12) | + NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); +} + static void nvc0_render_condition(struct pipe_context *pipe, struct pipe_query *pq, uint mode) @@ -427,9 +445,8 @@ nvc0_render_condition(struct pipe_context *pipe, /* NOTE: comparison of 2 queries only works if both have completed */ switch (q->type) { case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - /* query writes 1 if there was no overflow */ - cond = negated ? NVC0_3D_COND_MODE_RES_NON_ZERO : - NVC0_3D_COND_MODE_EQUAL; + cond = negated ? NVC0_3D_COND_MODE_EQUAL : + NVC0_3D_COND_MODE_NOT_EQUAL; wait = TRUE; break; case PIPE_QUERY_OCCLUSION_COUNTER: @@ -450,14 +467,8 @@ nvc0_render_condition(struct pipe_context *pipe, break; } - if (wait) { - MARK_RING (chan, 5, 2); - BEGIN_RING(chan, RING_3D_(NV84_SUBCHAN_QUERY_ADDRESS_HIGH), 4); - OUT_RELOCh(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RELOCl(chan, q->bo, q->offset, NOUVEAU_BO_GART | NOUVEAU_BO_RD); - OUT_RING (chan, q->sequence); - OUT_RING (chan, 0x00001001); - } + if (wait) + nvc0_query_fifo_wait(chan, pq); MARK_RING (chan, 4, 2); BEGIN_RING(chan, RING_3D(COND_ADDRESS_HIGH), 3); @@ -467,13 +478,33 @@ nvc0_render_condition(struct pipe_context *pipe, } void -nvc0_query_pushbuf_submit(struct nvc0_context *nvc0, +nvc0_query_pushbuf_submit(struct nouveau_channel *chan, struct pipe_query *pq, unsigned result_offset) { struct nvc0_query *q = nvc0_query(pq); - nouveau_pushbuf_submit(nvc0->screen->base.channel, - q->bo, q->offset + result_offset, 4); +#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8)) + + nouveau_pushbuf_submit(chan, q->bo, q->offset + result_offset, 4 | + NVC0_IB_ENTRY_1_NO_PREFETCH); +} + +void +nvc0_so_target_save_offset(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg, + unsigned index, boolean *serialize) +{ + struct nvc0_so_target *targ = nvc0_so_target(ptarg); + + if (*serialize) { + struct nouveau_channel *chan = nvc0_context(pipe)->screen->base.channel; + *serialize = FALSE; + IMMED_RING(chan, RING_3D(SERIALIZE), 0); + } + + nvc0_query(targ->pq)->index = index; + + nvc0_query_end(pipe, targ->pq); } void diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index 43fcc617910..67f9175f18e 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -104,9 +104,15 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 1; case PIPE_CAP_TIMER_QUERY: case PIPE_CAP_OCCLUSION_QUERY: + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: return 1; case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: - return 0; + return 4; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_ATTRIBS: + return 4; + case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: + case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: + return 128; case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_INDEP_BLEND_ENABLE: case PIPE_CAP_INDEP_BLEND_FUNC: diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c index f4a12fbee11..446bd948a53 100644 --- a/src/gallium/drivers/nvc0/nvc0_shader_state.c +++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c @@ -90,7 +90,9 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) return FALSE; } - return nvc0_program_upload_code(nvc0, prog); + if (likely(prog->code_size)) + return nvc0_program_upload_code(nvc0, prog); + return TRUE; /* stream output info only */ } void @@ -212,14 +214,15 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) struct nouveau_channel *chan = nvc0->screen->base.channel; struct nvc0_program *gp = nvc0->gmtyprog; - if (!gp) { + if (gp) + nvc0_program_validate(nvc0, gp); + /* we allow GPs with no code for specifying stream output state only */ + if (!gp || !gp->code_size) { BEGIN_RING(chan, RING_3D(GP_SELECT), 1); OUT_RING (chan, 0x40); IMMED_RING(chan, RING_3D(LAYER), 0); return; } - if (!nvc0_program_validate(nvc0, gp)) - return; nvc0_program_update_context_state(nvc0, gp, 3); BEGIN_RING(chan, RING_3D(GP_SELECT), 1); @@ -234,57 +237,76 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) nvc0_program_validate_clip(nvc0, gp); } -/* It's *is* kind of shader related. We need to inspect the program - * to get the output locations right. - */ void nvc0_tfb_validate(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; - struct nvc0_program *vp; - struct nvc0_transform_feedback_state *tfb = nvc0->tfb; - int b; + struct nvc0_transform_feedback_state *tfb; + unsigned b, n, i; - BEGIN_RING(chan, RING_3D(TFB_ENABLE), 1); - if (!tfb) { - OUT_RING(chan, 0); - return; + if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb; + else + if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb; + else + tfb = nvc0->vertprog->tfb; + + IMMED_RING(chan, RING_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0); + + if (tfb && tfb != nvc0->state.tfb) { + uint8_t var[128]; + + for (n = 0, b = 0; b < 4; n += tfb->varying_count[b++]) { + if (tfb->varying_count[b]) { + BEGIN_RING(chan, RING_3D(TFB_STREAM(b)), 3); + OUT_RING (chan, 0); + OUT_RING (chan, tfb->varying_count[b]); + OUT_RING (chan, tfb->stride[b]); + + for (i = 0; i < tfb->varying_count[b]; ++i) + var[i] = tfb->varying_index[n + i]; + for (; i & 3; ++i) + var[i] = 0; /* zero rest of method word bits */ + + BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4); + OUT_RINGp (chan, var, i / 4); + + if (nvc0->tfbbuf[b]) + nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b]; + } else { + IMMED_RING(chan, RING_3D(TFB_VARYING_COUNT(b)), 0); + } + } } - OUT_RING(chan, 1); + nvc0->state.tfb = tfb; - vp = nvc0->vertprog ? nvc0->vertprog : nvc0->gmtyprog; + if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS)) + return; + nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TFB); for (b = 0; b < nvc0->num_tfbbufs; ++b) { - uint8_t idx, var[128]; - int i, n; - struct nv04_resource *buf = nv04_resource(nvc0->tfbbuf[b]); + struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]); + struct nv04_resource *buf = nv04_resource(targ->pipe.buffer); - BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5); - OUT_RING (chan, 1); - OUT_RESRCh(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR); - OUT_RESRCl(chan, buf, nvc0->tfb_offset[b], NOUVEAU_BO_WR); - OUT_RING (chan, buf->base.width0 - nvc0->tfb_offset[b]); - OUT_RING (chan, 0); /* TFB_PRIMITIVE_ID <- offset ? */ + if (tfb) + targ->stride = tfb->stride[b]; - if (!(nvc0->dirty & NVC0_NEW_TFB)) + if (!(nvc0->tfbbuf_dirty & (1 << b))) continue; - BEGIN_RING(chan, RING_3D(TFB_UNK07X0(b)), 3); - OUT_RING (chan, 0); - OUT_RING (chan, tfb->varying_count[b]); - OUT_RING (chan, tfb->stride[b]); - - n = b ? tfb->varying_count[b - 1] : 0; - i = 0; - for (; i < tfb->varying_count[b]; ++i) { - idx = tfb->varying_index[n + i]; - var[i] = vp->vp.out_pos[idx >> 2] + (idx & 3); + if (!targ->clean) + nvc0_query_fifo_wait(chan, targ->pq); + BEGIN_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 5); + OUT_RING (chan, 1); + OUT_RESRCh(chan, buf, targ->pipe.buffer_offset, NOUVEAU_BO_WR); + OUT_RESRCl(chan, buf, targ->pipe.buffer_offset, NOUVEAU_BO_WR); + OUT_RING (chan, targ->pipe.buffer_size); + if (!targ->clean) { + nvc0_query_pushbuf_submit(chan, targ->pq, 0x4); + } else { + OUT_RING(chan, 0); /* TFB_BUFFER_OFFSET */ + targ->clean = FALSE; } - for (; i & 3; ++i) - var[i] = 0; - - BEGIN_RING(chan, RING_3D(TFB_VARYING_LOCS(b, 0)), i / 4); - OUT_RINGp (chan, var, i / 4); + nvc0_bufctx_add_resident(nvc0, NVC0_BUFCTX_TFB, buf, NOUVEAU_BO_WR); } for (; b < 4; ++b) IMMED_RING(chan, RING_3D(TFB_BUFFER_ENABLE(b)), 0); diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c index 63d53ab70d0..1e334a01d87 100644 --- a/src/gallium/drivers/nvc0/nvc0_state.c +++ b/src/gallium/drivers/nvc0/nvc0_state.c @@ -520,7 +520,12 @@ nvc0_sp_state_create(struct pipe_context *pipe, return NULL; prog->type = type; - prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + if (cso->tokens) + prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + if (cso->stream_output.num_outputs) + prog->pipe.stream_output = cso->stream_output; return (void *)prog; } @@ -747,72 +752,75 @@ nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso) nvc0->dirty |= NVC0_NEW_VERTEX; } -static void * -nvc0_tfb_state_create(struct pipe_context *pipe, - const struct pipe_stream_output_info *pso) +static struct pipe_stream_output_target * +nvc0_so_target_create(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size) { - struct nvc0_transform_feedback_state *so; - int n = 0; - int i, c, b; - - so = MALLOC(sizeof(*so) + pso->num_outputs * 4 * sizeof(uint8_t)); - if (!so) + struct nvc0_so_target *targ = MALLOC_STRUCT(nvc0_so_target); + if (!targ) return NULL; - for (b = 0; b < 4; ++b) { - for (i = 0; i < pso->num_outputs; ++i) { - if (pso->output[i].output_buffer != b) - continue; - for (c = 0; c < 4; ++c) { - if (!(pso->output[i].register_mask & (1 << c))) - continue; - so->varying_count[b]++; - so->varying_index[n++] = (pso->output[i].register_index << 2) | c; - } - } - so->stride[b] = so->varying_count[b] * 4; + targ->pq = pipe->create_query(pipe, NVC0_QUERY_TFB_BUFFER_OFFSET); + if (!targ->pq) { + FREE(targ); + return NULL; } - if (pso->stride) - so->stride[0] = pso->stride; + targ->clean = TRUE; - return so; -} + targ->pipe.buffer_size = size; + targ->pipe.buffer_offset = offset; + targ->pipe.context = pipe; + targ->pipe.buffer = NULL; + pipe_resource_reference(&targ->pipe.buffer, res); + pipe_reference_init(&targ->pipe.reference, 1); -static void -nvc0_tfb_state_delete(struct pipe_context *pipe, void *hwcso) -{ - FREE(hwcso); + return &targ->pipe; } static void -nvc0_tfb_state_bind(struct pipe_context *pipe, void *hwcso) +nvc0_so_target_destroy(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg) { - nvc0_context(pipe)->tfb = hwcso; - nvc0_context(pipe)->dirty |= NVC0_NEW_TFB; + struct nvc0_so_target *targ = nvc0_so_target(ptarg); + pipe->destroy_query(pipe, targ->pq); + FREE(targ); } static void -nvc0_set_transform_feedback_buffers(struct pipe_context *pipe, - struct pipe_resource **buffers, - int *offsets, - int num_buffers) +nvc0_set_transform_feedback_targets(struct pipe_context *pipe, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_mask) { struct nvc0_context *nvc0 = nvc0_context(pipe); - int i; + unsigned i; + boolean serialize = TRUE; - assert(num_buffers >= 0 && num_buffers <= 4); /* why signed ? */ + assert(num_targets <= 4); - for (i = 0; i < num_buffers; ++i) { - assert(offsets[i] >= 0); - nvc0->tfb_offset[i] = offsets[i]; - pipe_resource_reference(&nvc0->tfbbuf[i], buffers[i]); - } - for (; i < nvc0->num_tfbbufs; ++i) - pipe_resource_reference(&nvc0->tfbbuf[i], NULL); + for (i = 0; i < num_targets; ++i) { + if (nvc0->tfbbuf[i] == targets[i] && (append_mask & (1 << i))) + continue; + nvc0->tfbbuf_dirty |= 1 << i; - nvc0->num_tfbbufs = num_buffers; + if (nvc0->tfbbuf[i] && nvc0->tfbbuf[i] != targets[i]) + nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize); + + if (targets[i] && !(append_mask & (1 << i))) + nvc0_so_target(targets[i])->clean = TRUE; + + pipe_so_target_reference(&nvc0->tfbbuf[i], targets[i]); + } + for (; i < nvc0->num_tfbbufs; ++i) { + nvc0->tfbbuf_dirty |= 1 << i; + nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize); + pipe_so_target_reference(&nvc0->tfbbuf[i], NULL); + } + nvc0->num_tfbbufs = num_targets; - nvc0->dirty |= NVC0_NEW_TFB_BUFFERS; + if (nvc0->tfbbuf_dirty) + nvc0->dirty |= NVC0_NEW_TFB_TARGETS; } void @@ -871,17 +879,9 @@ nvc0_init_state_functions(struct nvc0_context *nvc0) pipe->set_vertex_buffers = nvc0_set_vertex_buffers; pipe->set_index_buffer = nvc0_set_index_buffer; -#if 0 - pipe->create_stream_output_state = nvc0_tfb_state_create; - pipe->delete_stream_output_state = nvc0_tfb_state_delete; - pipe->bind_stream_output_state = nvc0_tfb_state_bind; - pipe->set_stream_output_buffers = nvc0_set_transform_feedback_buffers; -#else - (void)nvc0_tfb_state_create; - (void)nvc0_tfb_state_delete; - (void)nvc0_tfb_state_bind; - (void)nvc0_set_transform_feedback_buffers; -#endif + pipe->create_stream_output_target = nvc0_so_target_create; + pipe->stream_output_target_destroy = nvc0_so_target_destroy; + pipe->set_stream_output_targets = nvc0_set_transform_feedback_targets; pipe->redefine_user_buffer = u_default_redefine_user_buffer; } diff --git a/src/gallium/drivers/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nvc0/nvc0_state_validate.c index 1ec95b7f8b5..0dc822a2f15 100644 --- a/src/gallium/drivers/nvc0/nvc0_state_validate.c +++ b/src/gallium/drivers/nvc0/nvc0_state_validate.c @@ -428,6 +428,7 @@ nvc0_validate_derived_1(struct nvc0_context *nvc0) { struct nouveau_channel *chan = nvc0->screen->base.channel; boolean early_z; + boolean rasterizer_discard; early_z = nvc0->fragprog->fp.early_z && !nvc0->zsa->pipe.alpha.enabled; @@ -435,6 +436,16 @@ nvc0_validate_derived_1(struct nvc0_context *nvc0) nvc0->state.early_z = early_z; IMMED_RING(chan, RING_3D(EARLY_FRAGMENT_TESTS), early_z); } + + rasterizer_discard = (!nvc0->fragprog || !nvc0->fragprog->hdr[18]) && + !nvc0->zsa->pipe.depth.enabled && !nvc0->zsa->pipe.stencil[0].enabled; + rasterizer_discard = rasterizer_discard || + nvc0->rast->pipe.rasterizer_discard; + + if (rasterizer_discard != nvc0->state.rasterizer_discard) { + nvc0->state.rasterizer_discard = rasterizer_discard; + IMMED_RING(chan, RING_3D(RASTERIZE_ENABLE), !rasterizer_discard); + } } static void @@ -484,13 +495,14 @@ static struct state_validate { { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG }, { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG }, { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG }, - { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA }, + { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA | + NVC0_NEW_RASTERIZER }, { nvc0_validate_clip, NVC0_NEW_CLIP }, { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF }, { nvc0_validate_textures, NVC0_NEW_TEXTURES }, { nvc0_validate_samplers, NVC0_NEW_SAMPLERS }, { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS }, - { nvc0_tfb_validate, NVC0_NEW_TFB | NVC0_NEW_TFB_BUFFERS } + { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG } }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nvc0/nvc0_stateobj.h index b508000bdab..5c0d0c1149b 100644 --- a/src/gallium/drivers/nvc0/nvc0_stateobj.h +++ b/src/gallium/drivers/nvc0/nvc0_stateobj.h @@ -50,11 +50,17 @@ struct nvc0_vertex_stateobj { struct nvc0_vertex_element element[0]; }; -/* will have to lookup index -> location qualifier from nvc0_program */ -struct nvc0_transform_feedback_state { - uint32_t stride[4]; - uint8_t varying_count[4]; - uint8_t varying_index[0]; +struct nvc0_so_target { + struct pipe_stream_output_target pipe; + struct pipe_query *pq; + unsigned stride; + boolean clean; }; +static INLINE struct nvc0_so_target * +nvc0_so_target(struct pipe_stream_output_target *ptarg) +{ + return (struct nvc0_so_target *)ptarg; +} + #endif diff --git a/src/gallium/drivers/nvc0/nvc0_surface.c b/src/gallium/drivers/nvc0/nvc0_surface.c index f807535f046..a2e1a853995 100644 --- a/src/gallium/drivers/nvc0/nvc0_surface.c +++ b/src/gallium/drivers/nvc0/nvc0_surface.c @@ -744,7 +744,8 @@ nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit) IMMED_RING(chan, RING_3D(STENCIL_ENABLE), 0); IMMED_RING(chan, RING_3D(ALPHA_TEST_ENABLE), 0); - /* transform feedback ? */ + /* disable transform feedback */ + IMMED_RING(chan, RING_3D(TFB_ENABLE), 0); } static void @@ -830,7 +831,8 @@ nvc0_blitctx_post_blit(struct nvc0_context *nvc0, struct nvc0_blitctx *blit) NVC0_NEW_RASTERIZER | NVC0_NEW_ZSA | NVC0_NEW_BLEND | NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS | NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG | - NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG); + NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG | + NVC0_NEW_TFB_TARGETS); } static void diff --git a/src/gallium/drivers/nvc0/nvc0_vbo.c b/src/gallium/drivers/nvc0/nvc0_vbo.c index 50e99ac5df3..7cf69910e6a 100644 --- a/src/gallium/drivers/nvc0/nvc0_vbo.c +++ b/src/gallium/drivers/nvc0/nvc0_vbo.c @@ -569,6 +569,38 @@ nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten, } } +static void +nvc0_draw_stream_output(struct nvc0_context *nvc0, + const struct pipe_draw_info *info) +{ + struct nouveau_channel *chan = nvc0->screen->base.channel; + struct nvc0_so_target *so = nvc0_so_target(info->count_from_stream_output); + struct nv04_resource *res = nv04_resource(so->pipe.buffer); + unsigned mode = nvc0_prim_gl(info->mode); + unsigned num_instances = info->instance_count; + + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + IMMED_RING(chan, RING_3D(SERIALIZE), 0); + nvc0_query_fifo_wait(chan, so->pq); + IMMED_RING(chan, RING_3D(VERTEX_ARRAY_FLUSH), 0); + } + + while (num_instances--) { + BEGIN_RING(chan, RING_3D(VERTEX_BEGIN_GL), 1); + OUT_RING (chan, mode); + BEGIN_RING(chan, RING_3D(DRAW_TFB_BASE), 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, RING_3D(DRAW_TFB_STRIDE), 1); + OUT_RING (chan, so->stride); + BEGIN_RING(chan, RING_3D(DRAW_TFB_BYTES), 1); + nvc0_query_pushbuf_submit(chan, so->pq, 0x4); + IMMED_RING(chan, RING_3D(VERTEX_END_GL), 0); + + mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } +} + void nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { @@ -615,6 +647,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nvc0->base.vbo_dirty = FALSE; } + if (unlikely(info->count_from_stream_output)) { + nvc0_draw_stream_output(nvc0, info); + } else if (!info->indexed) { nvc0_draw_arrays(nvc0, info->mode, info->start, info->count, |