From c76462b45f1e3a0aa2ee7971191e30e8a5f52015 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Fri, 30 Mar 2012 23:52:45 +0200 Subject: r600g: rework state emission of vertex buffers This reduces a little of CPU overhead. The idea is to translate pipe vertex buffers directly into the CS and not using any intermediate representations. Framerate in Torcs: before: 32.2 after: 34.6 Reviewed-by: Alex Deucher --- src/gallium/drivers/r600/evergreen_hw_context.c | 4 -- src/gallium/drivers/r600/evergreen_state.c | 43 +++++++++++++++++++ src/gallium/drivers/r600/r600.h | 1 - src/gallium/drivers/r600/r600_hw_context.c | 14 +------ src/gallium/drivers/r600/r600_pipe.h | 7 ++-- src/gallium/drivers/r600/r600_state.c | 35 ++++++++++++++++ src/gallium/drivers/r600/r600_state_common.c | 56 +++++-------------------- 7 files changed, 93 insertions(+), 67 deletions(-) (limited to 'src/gallium/drivers/r600') diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index 0d2228d3885..010b5f3f475 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -718,14 +718,10 @@ int evergreen_context_init(struct r600_context *ctx) ctx->num_ps_resources = 176; ctx->num_vs_resources = 160; - ctx->num_fs_resources = 16; r = evergreen_resource_range_init(ctx, &ctx->ps_resources, 0, 176, 0x20); if (r) goto out_err; r = evergreen_resource_range_init(ctx, &ctx->vs_resources, 0x1600, 160, 0x20); - if (r) - goto out_err; - r = evergreen_resource_range_init(ctx, &ctx->fs_resources, 0x7C00, 16, 0x20); if (r) goto out_err; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 04844e82395..00d2d0d6163 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1721,10 +1721,53 @@ static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_ r600_write_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override); } +static void evergreen_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = rctx->cs; + struct pipe_vertex_buffer *vb = rctx->vbuf_mgr->real_vertex_buffer; + unsigned count = rctx->vbuf_mgr->nr_real_vertex_buffers; + unsigned i; + uint64_t va; + + for (i = 0; i < count; i++) { + struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer; + + if (!rbuffer) { + continue; + } + + va = r600_resource_va(&rctx->screen->screen, &rbuffer->b.b.b); + va += vb[i].buffer_offset; + + /* fetch resources start at index 992 */ + r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0)); + r600_write_value(cs, (992 + i) * 8); + r600_write_value(cs, va); /* RESOURCEi_WORD0 */ + r600_write_value(cs, rbuffer->buf->size - vb[i].buffer_offset - 1); /* RESOURCEi_WORD1 */ + r600_write_value(cs, /* RESOURCEi_WORD2 */ + S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | + S_030008_STRIDE(vb[i].stride) | + S_030008_BASE_ADDRESS_HI(va >> 32UL)); + r600_write_value(cs, /* RESOURCEi_WORD3 */ + S_03000C_DST_SEL_X(V_03000C_SQ_SEL_X) | + S_03000C_DST_SEL_Y(V_03000C_SQ_SEL_Y) | + S_03000C_DST_SEL_Z(V_03000C_SQ_SEL_Z) | + S_03000C_DST_SEL_W(V_03000C_SQ_SEL_W)); + r600_write_value(cs, 0); /* RESOURCEi_WORD4 */ + r600_write_value(cs, 0); /* RESOURCEi_WORD5 */ + r600_write_value(cs, 0); /* RESOURCEi_WORD6 */ + r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */ + + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); + r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); + } +} + void evergreen_init_state_functions(struct r600_context *rctx) { r600_init_atom(&rctx->db_misc_state.atom, evergreen_emit_db_misc_state, 6, 0); r600_atom_dirty(rctx, &rctx->db_misc_state.atom); + r600_init_atom(&rctx->vertex_buffer_state, evergreen_emit_vertex_buffers, 0, 0); rctx->context.create_blend_state = evergreen_create_blend_state; rctx->context.create_depth_stencil_alpha_state = evergreen_create_dsa_state; diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h index 60b5694a6df..867d9d56838 100644 --- a/src/gallium/drivers/r600/r600.h +++ b/src/gallium/drivers/r600/r600.h @@ -202,7 +202,6 @@ void r600_context_fini(struct r600_context *ctx); void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state); void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); -void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid); void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id); void r600_context_flush(struct r600_context *ctx, unsigned flags); diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 5489ccee32c..29e4d23ba38 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -660,7 +660,6 @@ void r600_context_fini(struct r600_context *ctx) } r600_free_resource_range(ctx, &ctx->ps_resources, ctx->num_ps_resources); r600_free_resource_range(ctx, &ctx->vs_resources, ctx->num_vs_resources); - r600_free_resource_range(ctx, &ctx->fs_resources, ctx->num_fs_resources); free(ctx->blocks); } @@ -707,7 +706,6 @@ int r600_setup_block_table(struct r600_context *ctx) r600_add_resource_block(ctx, &ctx->ps_resources, ctx->num_ps_resources, &c); r600_add_resource_block(ctx, &ctx->vs_resources, ctx->num_vs_resources, &c); - r600_add_resource_block(ctx, &ctx->fs_resources, ctx->num_fs_resources, &c); return 0; } @@ -757,14 +755,10 @@ int r600_context_init(struct r600_context *ctx) ctx->num_ps_resources = 160; ctx->num_vs_resources = 160; - ctx->num_fs_resources = 16; r = r600_resource_range_init(ctx, &ctx->ps_resources, 0, 160, 0x1c); if (r) goto out_err; r = r600_resource_range_init(ctx, &ctx->vs_resources, 0x1180, 160, 0x1c); - if (r) - goto out_err; - r = r600_resource_range_init(ctx, &ctx->fs_resources, 0x2300, 16, 0x1c); if (r) goto out_err; @@ -977,13 +971,6 @@ void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r6 r600_context_pipe_state_set_resource(ctx, state, block); } -void r600_context_pipe_state_set_fs_resource(struct r600_context *ctx, struct r600_pipe_resource_state *state, unsigned rid) -{ - struct r600_block *block = ctx->fs_resources.blocks[rid]; - - r600_context_pipe_state_set_resource(ctx, state, block); -} - void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) { struct r600_range *range; @@ -1246,6 +1233,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) r600_emit_atom(ctx, &ctx->start_cs_cmd.atom); r600_atom_dirty(ctx, &ctx->db_misc_state.atom); + r600_atom_dirty(ctx, &ctx->vertex_buffer_state); if (streamout_suspended) { ctx->streamout_start = TRUE; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 96df79b9bf3..6de33cb2a34 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -237,7 +237,6 @@ struct r600_context { struct radeon_winsys *ws; struct r600_pipe_state *states[R600_PIPE_NSTATES]; struct r600_vertex_element *vertex_elements; - struct r600_pipe_resource_state fs_resource[PIPE_MAX_ATTRIBS]; struct pipe_framebuffer_state framebuffer; unsigned cb_target_mask; unsigned cb_color_control; @@ -282,6 +281,7 @@ struct r600_context { struct r600_surface_sync_cmd surface_sync_cmd; struct r600_atom r6xx_flush_and_inv_cmd; struct r600_db_misc_state db_misc_state; + struct r600_atom vertex_buffer_state; /* Below are variables from the old r600_context. */ @@ -318,8 +318,7 @@ struct r600_context { boolean predicate_drawing; struct r600_range ps_resources; struct r600_range vs_resources; - struct r600_range fs_resources; - int num_ps_resources, num_vs_resources, num_fs_resources; + int num_ps_resources, num_vs_resources; unsigned num_so_targets; struct r600_so_target *so_targets[PIPE_MAX_SO_BUFFERS]; @@ -334,6 +333,8 @@ struct r600_context { /* With rasterizer discard, there doesn't have to be a pixel shader. * In that case, we bind this one: */ void *dummy_pixel_shader; + + bool vertex_buffers_dirty; }; static INLINE void r600_emit_atom(struct r600_context *rctx, struct r600_atom *atom) diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 016434af493..96df9cb20cf 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -1699,10 +1699,45 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom r600_write_value(cs, db_render_override); /* R_028D10_DB_RENDER_OVERRIDE */ } +static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = rctx->cs; + struct pipe_vertex_buffer *vb = rctx->vbuf_mgr->real_vertex_buffer; + unsigned count = rctx->vbuf_mgr->nr_real_vertex_buffers; + unsigned i, offset; + + for (i = 0; i < count; i++) { + struct r600_resource *rbuffer = (struct r600_resource*)vb[i].buffer; + + if (!rbuffer) { + continue; + } + + offset = vb[i].buffer_offset; + + /* fetch resources start at index 320 */ + r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); + r600_write_value(cs, (320 + i) * 7); + r600_write_value(cs, offset); /* RESOURCEi_WORD0 */ + r600_write_value(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */ + r600_write_value(cs, /* RESOURCEi_WORD2 */ + S_038008_ENDIAN_SWAP(r600_endian_swap(32)) | + S_038008_STRIDE(vb[i].stride)); + r600_write_value(cs, 0); /* RESOURCEi_WORD3 */ + r600_write_value(cs, 0); /* RESOURCEi_WORD4 */ + r600_write_value(cs, 0); /* RESOURCEi_WORD5 */ + r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD6 */ + + r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); + r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); + } +} + void r600_init_state_functions(struct r600_context *rctx) { r600_init_atom(&rctx->db_misc_state.atom, r600_emit_db_misc_state, 4, 0); r600_atom_dirty(rctx, &rctx->db_misc_state.atom); + r600_init_atom(&rctx->vertex_buffer_state, r600_emit_vertex_buffers, 0, 0); rctx->context.create_blend_state = r600_create_blend_state; rctx->context.create_depth_stencil_alpha_state = r600_create_dsa_state; diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index fe0465e8caa..61f59f7e904 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -396,19 +396,9 @@ void r600_set_vertex_buffers(struct pipe_context *ctx, unsigned count, const struct pipe_vertex_buffer *buffers) { struct r600_context *rctx = (struct r600_context *)ctx; - int i; - - /* Zero states. */ - for (i = 0; i < count; i++) { - if (!buffers[i].buffer) { - r600_context_pipe_state_set_fs_resource(rctx, NULL, i); - } - } - for (; i < rctx->vbuf_mgr->nr_real_vertex_buffers; i++) { - r600_context_pipe_state_set_fs_resource(rctx, NULL, i); - } u_vbuf_set_vertex_buffers(rctx->vbuf_mgr, count, buffers); + rctx->vertex_buffers_dirty = true; } void *r600_create_vertex_elements(struct pipe_context *ctx, @@ -680,39 +670,6 @@ void r600_set_so_targets(struct pipe_context *ctx, rctx->streamout_append_bitmask = append_bitmask; } -static void r600_vertex_buffer_update(struct r600_context *rctx) -{ - unsigned i, count; - - r600_inval_vertex_cache(rctx); - - count = rctx->vbuf_mgr->nr_real_vertex_buffers; - - for (i = 0 ; i < count; i++) { - struct r600_pipe_resource_state *rstate = &rctx->fs_resource[i]; - struct pipe_vertex_buffer *vb = &rctx->vbuf_mgr->real_vertex_buffer[i]; - - if (!vb->buffer) { - continue; - } - - if (!rstate->id) { - if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_init_buffer_resource(rctx, rstate); - } else { - r600_pipe_init_buffer_resource(rctx, rstate); - } - } - - if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(&rctx->context, rstate, (struct r600_resource*)vb->buffer, vb->buffer_offset, vb->stride, RADEON_USAGE_READ); - } else { - r600_pipe_mod_buffer_resource(rstate, (struct r600_resource*)vb->buffer, vb->buffer_offset, vb->stride, RADEON_USAGE_READ); - } - r600_context_pipe_state_set_fs_resource(rctx, rstate, i); - } -} - static int r600_shader_rebuild(struct pipe_context * ctx, struct r600_pipe_shader * shader) { struct r600_context *rctx = (struct r600_context *)ctx; @@ -813,8 +770,15 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) r600_update_derived_state(rctx); - u_vbuf_draw_begin(rctx->vbuf_mgr, &info); - r600_vertex_buffer_update(rctx); + /* Update vertex buffers. */ + if ((u_vbuf_draw_begin(rctx->vbuf_mgr, &info) & U_VBUF_BUFFERS_UPDATED) || + rctx->vertex_buffers_dirty) { + r600_inval_vertex_cache(rctx); + rctx->vertex_buffer_state.num_dw = (rctx->chip_class >= EVERGREEN ? 12 : 10) * + rctx->vbuf_mgr->nr_real_vertex_buffers; + r600_atom_dirty(rctx, &rctx->vertex_buffer_state); + rctx->vertex_buffers_dirty = FALSE; + } if (info.indexed) { /* Initialize the index buffer struct. */ -- cgit v1.2.3