diff options
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute.c | 30 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_compute_internal.c | 42 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_hw_context.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r600/evergreen_state.c | 56 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_asm.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_buffer.c | 17 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_hw_context.c | 51 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.c | 142 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_pipe.h | 40 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_query.c | 24 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_shader.c | 2 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state.c | 48 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_state_common.c | 29 | ||||
-rw-r--r-- | src/gallium/drivers/r600/r600_texture.c | 15 |
14 files changed, 327 insertions, 176 deletions
diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index ed5055b950f..f4a790585b1 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -210,8 +210,7 @@ void evergreen_compute_upload_input( ctx->screen, buffer_size); } - num_work_groups_start = ctx->ws->buffer_map( - shader->kernel_param->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE); + num_work_groups_start = r600_buffer_mmap_sync_with_rings(ctx, shader->kernel_param, PIPE_TRANSFER_WRITE); global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4)); local_size_start = global_size_start + (3 * (sizeof(uint)) / 4); kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4); @@ -251,7 +250,7 @@ static void evergreen_emit_direct_dispatch( const uint *block_layout, const uint *grid_layout) { int i; - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; unsigned num_waves; unsigned num_pipes = rctx->screen->info.r600_max_pipes; unsigned wave_divisor = (16 * num_pipes); @@ -314,20 +313,22 @@ static void evergreen_emit_direct_dispatch( static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, const uint *grid_layout) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; unsigned flush_flags = 0; int i; - struct r600_resource *onebo = NULL; struct evergreen_compute_resource *resources = ctx->cs_shader_state.shader->resources; + /* make sure that the gfx ring is only one active */ + ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC); + /* Initialize all the compute-related registers. * * See evergreen_init_atom_start_compute_cs() in this file for the list * of registers initialized by the start_compute_cs_cmd atom. */ - r600_emit_command_buffer(ctx->cs, &ctx->start_compute_cs_cmd); + r600_emit_command_buffer(cs, &ctx->start_compute_cs_cmd); ctx->flags |= R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_FLUSH_AND_INV; r600_flush_emit(ctx); @@ -335,7 +336,8 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, /* Emit colorbuffers. */ for (i = 0; i < ctx->framebuffer.state.nr_cbufs; i++) { struct r600_surface *cb = (struct r600_surface*)ctx->framebuffer.state.cbufs[i]; - unsigned reloc = r600_context_bo_reloc(ctx, (struct r600_resource*)cb->base.texture, + unsigned reloc = r600_context_bo_reloc(ctx, &ctx->rings.gfx, + (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE); r600_write_compute_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 7); @@ -424,7 +426,7 @@ static void compute_emit_cs(struct r600_context *ctx, const uint *block_layout, flush_flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; } - ctx->ws->cs_flush(ctx->cs, flush_flags); + ctx->ws->cs_flush(ctx->rings.gfx.cs, flush_flags); ctx->pm4_dirty_cdwords = 0; ctx->flags = 0; @@ -452,7 +454,7 @@ void evergreen_emit_cs_shader( (struct r600_cs_shader_state*)atom; struct r600_pipe_compute *shader = state->shader; struct r600_kernel *kernel = &shader->kernels[state->kernel_index]; - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; uint64_t va; va = r600_resource_va(&rctx->screen->screen, &kernel->code_bo->b.b); @@ -465,8 +467,8 @@ void evergreen_emit_cs_shader( r600_write_value(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */ r600_write_value(cs, PKT3C(PKT3_NOP, 0, 0)); - r600_write_value(cs, r600_context_bo_reloc(rctx, kernel->code_bo, - RADEON_USAGE_READ)); + r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, + kernel->code_bo, RADEON_USAGE_READ)); rctx->flags |= R600_CONTEXT_INVAL_READ_CACHES; } @@ -488,8 +490,7 @@ static void evergreen_launch_grid( r600_compute_shader_create(ctx_, kernel->llvm_module, &kernel->bc); kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen, kernel->bc.ndw * 4); - p = ctx->ws->buffer_map(kernel->code_bo->cs_buf, ctx->cs, - PIPE_TRANSFER_WRITE); + p = r600_buffer_mmap_sync_with_rings(ctx, kernel->code_bo, PIPE_TRANSFER_WRITE); memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4); ctx->ws->buffer_unmap(kernel->code_bo->cs_buf); } @@ -901,8 +902,7 @@ void *r600_compute_global_transfer_map( COMPUTE_DBG("* r600_compute_global_transfer_map()\n"); - if (!(map = rctx->ws->buffer_map(buffer->chunk->pool->bo->cs_buf, - rctx->cs, transfer->usage))) { + if (!(map = r600_buffer_mmap_sync_with_rings(rctx, buffer->chunk->pool->bo, transfer->usage))) { util_slab_free(&rctx->pool_transfers, transfer); return NULL; } diff --git a/src/gallium/drivers/r600/evergreen_compute_internal.c b/src/gallium/drivers/r600/evergreen_compute_internal.c index 1654ab05458..2e8e9daca62 100644 --- a/src/gallium/drivers/r600/evergreen_compute_internal.c +++ b/src/gallium/drivers/r600/evergreen_compute_internal.c @@ -63,7 +63,9 @@ void evergreen_emit_raw_value( void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value) { - ctx->cs->buf[ctx->cs->cdw++] = value; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; + + cs->buf[cs->cdw++] = value; } void evergreen_mult_reg_set_( @@ -178,37 +180,38 @@ void evergreen_emit_ctx_reg_set( unsigned index, int num) { + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; if (index >= EVERGREEN_CONFIG_REG_OFFSET && index < EVERGREEN_CONFIG_REG_END) { - ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0); - ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2; + cs->buf[cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0); + cs->buf[cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2; } else if (index >= EVERGREEN_CONTEXT_REG_OFFSET && index < EVERGREEN_CONTEXT_REG_END) { - ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0); - ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2; + cs->buf[cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0); + cs->buf[cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2; } else if (index >= EVERGREEN_RESOURCE_OFFSET && index < EVERGREEN_RESOURCE_END) { - ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0); - ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2; + cs->buf[cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0); + cs->buf[cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2; } else if (index >= EVERGREEN_SAMPLER_OFFSET && index < EVERGREEN_SAMPLER_END) { - ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0); - ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2; + cs->buf[cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0); + cs->buf[cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2; } else if (index >= EVERGREEN_CTL_CONST_OFFSET && index < EVERGREEN_CTL_CONST_END) { - ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0); - ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2; + cs->buf[cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0); + cs->buf[cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2; } else if (index >= EVERGREEN_LOOP_CONST_OFFSET && index < EVERGREEN_LOOP_CONST_END) { - ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0); - ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2; + cs->buf[cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0); + cs->buf[cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2; } else if (index >= EVERGREEN_BOOL_CONST_OFFSET && index < EVERGREEN_BOOL_CONST_END) { - ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0); - ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2; + cs->buf[cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0); + cs->buf[cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2; } else { - ctx->cs->buf[ctx->cs->cdw++] = PKT0(index, num-1); + cs->buf[cs->cdw++] = PKT0(index, num-1); } } @@ -217,13 +220,14 @@ void evergreen_emit_ctx_reloc( struct r600_resource *bo, enum radeon_bo_usage usage) { + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; u32 rr = 0; assert(bo); - ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - rr = r600_context_bo_reloc(ctx, bo, usage); - ctx->cs->buf[ctx->cs->cdw++] = rr; + cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); + rr = r600_context_bo_reloc(ctx, &ctx->rings.gfx, bo, usage); + cs->buf[cs->cdw++] = rr; } int evergreen_compute_get_gpu_format( diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index 0ca7f9e0512..fa90c9a9b74 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -210,7 +210,7 @@ out_err: void evergreen_flush_vgt_streamout(struct r600_context *ctx) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; r600_write_config_reg(cs, R_0084FC_CP_STRMOUT_CNTL, 0); @@ -228,7 +228,7 @@ void evergreen_flush_vgt_streamout(struct r600_context *ctx) void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; if (buffer_enable_bit) { r600_write_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2); diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index ac85fb43659..d03c376f0fc 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1202,7 +1202,7 @@ evergreen_create_sampler_view(struct pipe_context *ctx, static void evergreen_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct pipe_clip_state *state = &rctx->clip_state.state; r600_write_context_reg_seq(cs, R_0285BC_PA_CL_UCP0_X, 6*4); @@ -1245,7 +1245,7 @@ static void evergreen_set_scissor_state(struct pipe_context *ctx, static void evergreen_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct pipe_scissor_state *state = &rctx->scissor.scissor; uint32_t tl, br; @@ -1811,7 +1811,7 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples) }; static unsigned max_dist_8x = 7; - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; unsigned max_dist = 0; switch (nr_samples) { @@ -1901,7 +1901,7 @@ static void cayman_emit_msaa_state(struct r600_context *rctx, int nr_samples) }; static unsigned max_dist_16x = 8; - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; unsigned max_dist = 0; switch (nr_samples) { @@ -1992,7 +1992,7 @@ static void cayman_emit_msaa_state(struct r600_context *rctx, int nr_samples) static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct pipe_framebuffer_state *state = &rctx->framebuffer.state; unsigned nr_cbufs = state->nr_cbufs; unsigned i, tl, br; @@ -2005,7 +2005,9 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r /* Colorbuffers. */ for (i = 0; i < nr_cbufs; i++) { struct r600_surface *cb = (struct r600_surface*)state->cbufs[i]; - unsigned reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)cb->base.texture, + unsigned reloc = r600_context_bo_reloc(rctx, + &rctx->rings.gfx, + (struct r600_resource*)cb->base.texture, RADEON_USAGE_READWRITE); r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 11); @@ -2044,7 +2046,9 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r ((struct r600_surface*)state->cbufs[0])->cb_color_info); if (!rctx->keep_tiling_flags) { - unsigned reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)state->cbufs[0]->texture, + unsigned reloc = r600_context_bo_reloc(rctx, + &rctx->rings.gfx, + (struct r600_resource*)state->cbufs[0]->texture, RADEON_USAGE_READWRITE); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); /* R_028C70_CB_COLOR0_INFO */ @@ -2064,7 +2068,9 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r /* ZS buffer. */ if (state->zsbuf) { struct r600_surface *zb = (struct r600_surface*)state->zsbuf; - unsigned reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)state->zsbuf->texture, + unsigned reloc = r600_context_bo_reloc(rctx, + &rctx->rings.gfx, + (struct r600_resource*)state->zsbuf->texture, RADEON_USAGE_READWRITE); r600_write_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, @@ -2121,7 +2127,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a; float offset_units = state->offset_units; float offset_scale = state->offset_scale; @@ -2146,7 +2152,7 @@ static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600 static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1; unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1; @@ -2161,7 +2167,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_db_state *a = (struct r600_db_state*)atom; if (a->rsurf && a->rsurf->htile_enabled) { @@ -2172,7 +2178,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom r600_write_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface); r600_write_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control); r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); - reloc_idx = r600_context_bo_reloc(rctx, rtex->htile, RADEON_USAGE_READWRITE); + reloc_idx = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rtex->htile, RADEON_USAGE_READWRITE); cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = reloc_idx; } else { @@ -2183,7 +2189,7 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; unsigned db_render_control = 0; unsigned db_count_control = 0; @@ -2233,7 +2239,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, unsigned resource_offset, unsigned pkt_flags) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -2269,7 +2275,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx, r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags); - r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); + r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ)); } state->dirty_mask = 0; } @@ -2291,7 +2297,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, unsigned reg_alu_constbuf_size, unsigned reg_alu_const_cache) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -2312,7 +2318,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, va >> 8); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); - r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); + r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ)); r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 8, 0)); r600_write_value(cs, (buffer_id_base + buffer_index) * 8); @@ -2333,7 +2339,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx, r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD7 */ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); - r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); + r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ)); dirty_mask &= ~(1 << buffer_index); } @@ -2365,7 +2371,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, struct r600_samplerview_state *state, unsigned resource_id_base) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -2380,7 +2386,7 @@ static void evergreen_emit_sampler_views(struct r600_context *rctx, r600_write_value(cs, (resource_id_base + resource_index) * 8); r600_write_array(cs, 8, rview->tex_resource_words); - reloc = r600_context_bo_reloc(rctx, rview->tex_resource, + reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rview->tex_resource, RADEON_USAGE_READ); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); r600_write_value(cs, reloc); @@ -2413,7 +2419,7 @@ static void evergreen_emit_sampler_states(struct r600_context *rctx, unsigned resource_id_base, unsigned border_index_reg) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; uint32_t dirty_mask = texinfo->states.dirty_mask; while (dirty_mask) { @@ -2456,14 +2462,14 @@ static void evergreen_emit_sample_mask(struct r600_context *rctx, struct r600_at struct r600_sample_mask *s = (struct r600_sample_mask*)a; uint8_t mask = s->sample_mask; - r600_write_context_reg(rctx->cs, R_028C3C_PA_SC_AA_MASK, + r600_write_context_reg(rctx->rings.gfx.cs, R_028C3C_PA_SC_AA_MASK, mask | (mask << 8) | (mask << 16) | (mask << 24)); } static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a) { struct r600_sample_mask *s = (struct r600_sample_mask*)a; - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; uint16_t mask = s->sample_mask; r600_write_context_reg_seq(cs, CM_R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); @@ -2473,14 +2479,14 @@ static void cayman_emit_sample_mask(struct r600_context *rctx, struct r600_atom static void evergreen_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_cso_state *state = (struct r600_cso_state*)a; struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso; r600_write_context_reg(cs, R_0288A4_SQ_PGM_START_FS, (r600_resource_va(rctx->context.screen, &shader->buffer->b.b) + shader->offset) >> 8); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); - r600_write_value(cs, r600_context_bo_reloc(rctx, shader->buffer, RADEON_USAGE_READ)); + r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, shader->buffer, RADEON_USAGE_READ)); } void evergreen_init_state_functions(struct r600_context *rctx) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 3aaea4a73e9..07fe1df865b 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -2890,8 +2890,7 @@ void *r600_create_vertex_fetch_shader(struct pipe_context *ctx, return NULL; } - bytecode = rctx->ws->buffer_map(shader->buffer->cs_buf, rctx->cs, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED); + bytecode = r600_buffer_mmap_sync_with_rings(rctx, shader->buffer, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_UNSYNCHRONIZED); bytecode += shader->offset / 4; if (R600_BIG_ENDIAN) { diff --git a/src/gallium/drivers/r600/r600_buffer.c b/src/gallium/drivers/r600/r600_buffer.c index e674e13a144..be171f8850a 100644 --- a/src/gallium/drivers/r600/r600_buffer.c +++ b/src/gallium/drivers/r600/r600_buffer.c @@ -85,11 +85,11 @@ static void *r600_buffer_get_transfer(struct pipe_context *ctx, } static void *r600_buffer_transfer_map(struct pipe_context *ctx, - struct pipe_resource *resource, - unsigned level, - unsigned usage, - const struct pipe_box *box, - struct pipe_transfer **ptransfer) + struct pipe_resource *resource, + unsigned level, + unsigned usage, + const struct pipe_box *box, + struct pipe_transfer **ptransfer) { struct r600_context *rctx = (struct r600_context*)ctx; struct r600_resource *rbuffer = r600_resource(resource); @@ -102,7 +102,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ - if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || + if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { unsigned i, mask; @@ -144,7 +144,7 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, assert(usage & PIPE_TRANSFER_WRITE); /* Check if mapping this buffer would cause waiting for the GPU. */ - if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || + if (r600_rings_is_buffer_referenced(rctx, rbuffer->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rbuffer->buf, RADEON_USAGE_READWRITE)) { /* Do a wait-free write-only transfer using a temporary buffer. */ unsigned offset; @@ -161,7 +161,8 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx, } } - data = rctx->ws->buffer_map(rbuffer->cs_buf, rctx->cs, usage); + /* mmap and synchronize with rings */ + data = r600_buffer_mmap_sync_with_rings(rctx, rbuffer, usage); if (!data) { return NULL; } diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 723a61f98ef..b28827f2b29 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -32,7 +32,7 @@ /* Get backends mask */ void r600_get_backend_mask(struct r600_context *ctx) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; struct r600_resource *buffer; uint32_t *results; unsigned num_backends = ctx->screen->info.r600_num_backends; @@ -72,11 +72,10 @@ void r600_get_backend_mask(struct r600_context *ctx) PIPE_USAGE_STAGING, ctx->max_db*16); if (!buffer) goto err; - va = r600_resource_va(&ctx->screen->screen, (void*)buffer); /* initialize buffer with zeroes */ - results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE); + results = r600_buffer_mmap_sync_with_rings(ctx, buffer, PIPE_TRANSFER_WRITE); if (results) { memset(results, 0, ctx->max_db * 4 * 4); ctx->ws->buffer_unmap(buffer->cs_buf); @@ -88,10 +87,10 @@ void r600_get_backend_mask(struct r600_context *ctx) cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE); + cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, buffer, RADEON_USAGE_WRITE); /* analyze results */ - results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_READ); + results = r600_buffer_mmap_sync_with_rings(ctx, buffer, PIPE_TRANSFER_READ); if (results) { for(i = 0; i < ctx->max_db; i++) { /* at least highest bit will be set if backend is used */ @@ -361,7 +360,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in) { /* The number of dwords we already used in the CS so far. */ - num_dw += ctx->cs->cdw; + num_dw += ctx->rings.gfx.cs->cdw; if (count_draw_in) { unsigned i; @@ -413,7 +412,7 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, /* Flush if there's not enough space. */ if (num_dw > RADEON_MAX_CMDBUF_DWORDS) { - r600_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC); + ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC); } } @@ -543,7 +542,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *block, unsigned pkt_flags) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; int optional = block->nbo == 0 && !(block->flags & REG_FLAG_DIRTY_ALWAYS); int cp_dwords = block->pm4_ndwords, start_dword = 0; int new_dwords = 0; @@ -560,7 +559,7 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block * struct r600_block_reloc *reloc = &block->reloc[block->pm4_bo_index[j]]; if (reloc->bo) { block->pm4[reloc->bo_pm4_index] = - r600_context_bo_reloc(ctx, reloc->bo, reloc->bo_usage); + r600_context_bo_reloc(ctx, &ctx->rings.gfx, reloc->bo, reloc->bo_usage); } else { block->pm4[reloc->bo_pm4_index] = 0; } @@ -604,7 +603,7 @@ out: void r600_flush_emit(struct r600_context *rctx) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; unsigned cp_coher_cntl = 0; unsigned wait_until = 0; unsigned emit_flush = 0; @@ -692,7 +691,7 @@ void r600_flush_emit(struct r600_context *rctx) void r600_context_flush(struct r600_context *ctx, unsigned flags) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; if (cs->cdw == ctx->start_cs_cmd.num_dw) return; @@ -743,7 +742,7 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) rscreen->cs_count++; } #endif - ctx->ws->cs_flush(ctx->cs, flags); + ctx->ws->cs_flush(ctx->rings.gfx.cs, flags); #if R600_TRACE_CS if (ctx->screen->trace_bo) { struct r600_screen *rscreen = ctx->screen; @@ -776,7 +775,7 @@ void r600_begin_new_cs(struct r600_context *ctx) ctx->flags = 0; /* Begin a new CS. */ - r600_emit_command_buffer(ctx->cs, &ctx->start_cs_cmd); + r600_emit_command_buffer(ctx->rings.gfx.cs, &ctx->start_cs_cmd); /* Re-emit states. */ ctx->alphatest_state.atom.dirty = true; @@ -854,7 +853,7 @@ void r600_begin_new_cs(struct r600_context *ctx) void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; uint64_t va; r600_need_cs_space(ctx, 10, FALSE); @@ -872,12 +871,12 @@ void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fen cs->buf[cs->cdw++] = value; /* DATA_LO */ cs->buf[cs->cdw++] = 0; /* DATA_HI */ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE); + cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, fence_bo, RADEON_USAGE_WRITE); } static void r600_flush_vgt_streamout(struct r600_context *ctx) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; r600_write_config_reg(cs, R_008490_CP_STRMOUT_CNTL, 0); @@ -895,7 +894,7 @@ static void r600_flush_vgt_streamout(struct r600_context *ctx) static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; if (buffer_enable_bit) { r600_write_context_reg(cs, R_028AB0_VGT_STRMOUT_EN, S_028AB0_STREAMOUT(1)); @@ -907,7 +906,7 @@ static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_ void r600_context_streamout_begin(struct r600_context *ctx) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; struct r600_so_target **t = ctx->so_targets; unsigned *stride_in_dw = ctx->vs_shader->so.stride; unsigned buffer_en, i, update_flags = 0; @@ -963,7 +962,7 @@ void r600_context_streamout_begin(struct r600_context *ctx) cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = - r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer), + r600_context_bo_reloc(ctx, &ctx->rings.gfx, r600_resource(t[i]->b.buffer), RADEON_USAGE_WRITE); /* R7xx requires this packet after updating BUFFER_BASE. @@ -975,7 +974,7 @@ void r600_context_streamout_begin(struct r600_context *ctx) cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = - r600_context_bo_reloc(ctx, r600_resource(t[i]->b.buffer), + r600_context_bo_reloc(ctx, &ctx->rings.gfx, r600_resource(t[i]->b.buffer), RADEON_USAGE_WRITE); } @@ -993,7 +992,7 @@ void r600_context_streamout_begin(struct r600_context *ctx) cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = - r600_context_bo_reloc(ctx, t[i]->buf_filled_size, + r600_context_bo_reloc(ctx, &ctx->rings.gfx, t[i]->buf_filled_size, RADEON_USAGE_READ); } else { /* Start from the beginning. */ @@ -1016,7 +1015,7 @@ void r600_context_streamout_begin(struct r600_context *ctx) void r600_context_streamout_end(struct r600_context *ctx) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; struct r600_so_target **t = ctx->so_targets; unsigned i; uint64_t va; @@ -1042,7 +1041,7 @@ void r600_context_streamout_end(struct r600_context *ctx) cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = - r600_context_bo_reloc(ctx, t[i]->buf_filled_size, + r600_context_bo_reloc(ctx, &ctx->rings.gfx, t[i]->buf_filled_size, RADEON_USAGE_WRITE); } @@ -1069,7 +1068,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, struct pipe_resource *src, uint64_t src_offset, unsigned size) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; assert(size); assert(rctx->chip_class != R600); @@ -1110,8 +1109,8 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx, } /* This must be done after r600_need_cs_space. */ - src_reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)src, RADEON_USAGE_READ); - dst_reloc = r600_context_bo_reloc(rctx, (struct r600_resource*)dst, RADEON_USAGE_WRITE); + src_reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, (struct r600_resource*)src, RADEON_USAGE_READ); + dst_reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, (struct r600_resource*)dst, RADEON_USAGE_WRITE); r600_write_value(cs, PKT3(PKT3_CP_DMA, 4, 0)); r600_write_value(cs, src_offset); /* SRC_ADDR_LO [31:0] */ diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index e4a35cfe06c..c72ee8f8571 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -53,9 +53,7 @@ static struct r600_fence *r600_create_fence(struct r600_context *rctx) R600_ERR("r600: failed to create bo for fence objects\n"); goto out; } - rscreen->fences.data = rctx->ws->buffer_map(rscreen->fences.bo->cs_buf, - rctx->cs, - PIPE_TRANSFER_READ_WRITE); + rscreen->fences.data = r600_buffer_mmap_sync_with_rings(rctx, rscreen->fences.bo, PIPE_TRANSFER_READ_WRITE); } if (!LIST_IS_EMPTY(&rscreen->fences.pool)) { @@ -108,25 +106,20 @@ static struct r600_fence *r600_create_fence(struct r600_context *rctx) pipe_buffer_create(&rctx->screen->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STAGING, 1); /* Add the fence as a dummy relocation. */ - r600_context_bo_reloc(rctx, fence->sleep_bo, RADEON_USAGE_READWRITE); + r600_context_bo_reloc(rctx, &rctx->rings.gfx, fence->sleep_bo, RADEON_USAGE_READWRITE); out: pipe_mutex_unlock(rscreen->fences.mutex); return fence; } - -void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, - unsigned flags) +static void r600_flush(struct pipe_context *ctx, unsigned flags) { struct r600_context *rctx = (struct r600_context *)ctx; - struct r600_fence **rfence = (struct r600_fence**)fence; struct pipe_query *render_cond = NULL; unsigned render_cond_mode = 0; - if (rfence) - *rfence = r600_create_fence(rctx); - + rctx->rings.gfx.flushing = true; /* Disable render condition. */ if (rctx->current_render_cond) { render_cond = rctx->current_render_cond; @@ -140,19 +133,119 @@ void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, if (render_cond) { ctx->render_condition(ctx, render_cond, render_cond_mode); } + rctx->rings.gfx.flushing = false; } static void r600_flush_from_st(struct pipe_context *ctx, struct pipe_fence_handle **fence, enum pipe_flush_flags flags) { - r600_flush(ctx, fence, - flags & PIPE_FLUSH_END_OF_FRAME ? RADEON_FLUSH_END_OF_FRAME : 0); + struct r600_context *rctx = (struct r600_context *)ctx; + struct r600_fence **rfence = (struct r600_fence**)fence; + unsigned fflags; + + fflags = flags & PIPE_FLUSH_END_OF_FRAME ? RADEON_FLUSH_END_OF_FRAME : 0; + if (rfence) { + *rfence = r600_create_fence(rctx); + } + /* flush gfx & dma ring, order does not matter as only one can be live */ + rctx->rings.dma.flush(rctx, fflags); + rctx->rings.gfx.flush(rctx, fflags); +} + +static void r600_flush_gfx_ring(void *ctx, unsigned flags) +{ + r600_flush((struct pipe_context*)ctx, flags); +} + +static void r600_flush_dma_ring(void *ctx, unsigned flags) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + + if (!rctx->rings.dma.cs->cdw) { + return; + } + rctx->rings.dma.flushing = true; + rctx->ws->cs_flush(rctx->rings.dma.cs, flags); + rctx->rings.dma.flushing = false; +} + +boolean r600_rings_is_buffer_referenced(struct r600_context *ctx, + struct radeon_winsys_cs_handle *buf, + enum radeon_bo_usage usage) +{ + if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, buf, usage)) { + return TRUE; + } + if (ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, buf, usage)) { + return TRUE; + } + return FALSE; +} + +void *r600_buffer_mmap_sync_with_rings(struct r600_context *ctx, + struct r600_resource *resource, + unsigned usage) +{ + enum radeon_bo_usage rusage = RADEON_USAGE_READWRITE; + unsigned flags = 0; + bool sync_flush = TRUE; + + if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) { + return ctx->ws->buffer_map(resource->cs_buf, NULL, usage); + } + + if (!(usage & PIPE_TRANSFER_WRITE)) { + /* have to wait for pending read */ + rusage = RADEON_USAGE_WRITE; + } + if (usage & PIPE_TRANSFER_DONTBLOCK) { + flags |= RADEON_FLUSH_ASYNC; + } + + if (ctx->ws->cs_is_buffer_referenced(ctx->rings.gfx.cs, resource->cs_buf, rusage) && ctx->rings.gfx.cs->cdw) { + ctx->rings.gfx.flush(ctx, flags); + if (usage & PIPE_TRANSFER_DONTBLOCK) { + return NULL; + } + } + if (ctx->ws->cs_is_buffer_referenced(ctx->rings.dma.cs, resource->cs_buf, rusage) && ctx->rings.dma.cs->cdw) { + ctx->rings.dma.flush(ctx, flags); + if (usage & PIPE_TRANSFER_DONTBLOCK) { + return NULL; + } + } + + if (usage & PIPE_TRANSFER_DONTBLOCK) { + if (ctx->ws->buffer_is_busy(resource->buf, rusage)) { + return NULL; + } + } + if (sync_flush) { + /* Try to avoid busy-waiting in radeon_bo_wait. */ + ctx->ws->cs_sync_flush(ctx->rings.gfx.cs); + if (ctx->rings.dma.cs) { + ctx->ws->cs_sync_flush(ctx->rings.dma.cs); + } + } + ctx->ws->buffer_wait(resource->buf, rusage); + + /* at this point everything is synchronized */ + return ctx->ws->buffer_map(resource->cs_buf, NULL, usage | PIPE_TRANSFER_UNSYNCHRONIZED); } static void r600_flush_from_winsys(void *ctx, unsigned flags) { - r600_flush((struct pipe_context*)ctx, NULL, flags); + struct r600_context *rctx = (struct r600_context *)ctx; + + rctx->rings.gfx.flush(rctx, flags); +} + +static void r600_flush_dma_from_winsys(void *ctx, unsigned flags) +{ + struct r600_context *rctx = (struct r600_context *)ctx; + + rctx->rings.dma.flush(rctx, flags); } static void r600_destroy_context(struct pipe_context *context) @@ -197,8 +290,11 @@ static void r600_destroy_context(struct pipe_context *context) r600_release_command_buffer(&rctx->start_cs_cmd); - if (rctx->cs) { - rctx->ws->cs_destroy(rctx->cs); + if (rctx->rings.gfx.cs) { + rctx->ws->cs_destroy(rctx->rings.gfx.cs); + } + if (rctx->rings.dma.cs) { + rctx->ws->cs_destroy(rctx->rings.dma.cs); } FREE(rctx->range); @@ -289,8 +385,18 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void goto fail; } - rctx->cs = rctx->ws->cs_create(rctx->ws, RING_GFX); - rctx->ws->cs_set_flush_callback(rctx->cs, r600_flush_from_winsys, rctx); + rctx->rings.gfx.cs = rctx->ws->cs_create(rctx->ws, RING_GFX); + rctx->rings.gfx.flush = r600_flush_gfx_ring; + rctx->ws->cs_set_flush_callback(rctx->rings.gfx.cs, r600_flush_from_winsys, rctx); + rctx->rings.gfx.flushing = false; + + rctx->rings.dma.cs = NULL; + if (rscreen->info.r600_has_dma) { + rctx->rings.dma.cs = rctx->ws->cs_create(rctx->ws, RING_DMA); + rctx->rings.dma.flush = r600_flush_dma_ring; + rctx->ws->cs_set_flush_callback(rctx->rings.dma.cs, r600_flush_dma_from_winsys, rctx); + rctx->rings.dma.flushing = false; + } rctx->uploader = u_upload_create(&rctx->context, 1024 * 1024, 256, PIPE_BIND_INDEX_BUFFER | diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index d983718b1bb..5cb080579f0 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -406,11 +406,22 @@ struct r600_fetch_shader { unsigned offset; }; +struct r600_ring { + struct radeon_winsys_cs *cs; + bool flushing; + void (*flush)(void *ctx, unsigned flags); +}; + +struct r600_rings { + struct r600_ring gfx; + struct r600_ring dma; +}; + struct r600_context { struct pipe_context context; struct r600_screen *screen; struct radeon_winsys *ws; - struct radeon_winsys_cs *cs; + struct r600_rings rings; struct blitter_context *blitter; struct u_upload_mgr *uploader; struct u_suballocator *allocator_so_filled_size; @@ -626,8 +637,12 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, unsigned alignment); /* r600_pipe.c */ -void r600_flush(struct pipe_context *ctx, struct pipe_fence_handle **fence, - unsigned flags); +boolean r600_rings_is_buffer_referenced(struct r600_context *ctx, + struct radeon_winsys_cs_handle *buf, + enum radeon_bo_usage usage); +void *r600_buffer_mmap_sync_with_rings(struct r600_context *ctx, + struct r600_resource *resource, + unsigned usage); /* r600_query.c */ void r600_init_query_functions(struct r600_context *rctx); @@ -835,12 +850,25 @@ void r600_release_command_buffer(struct r600_command_buffer *cb); /* * Helpers for emitting state into a command stream directly. */ - -static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, struct r600_resource *rbo, +static INLINE unsigned r600_context_bo_reloc(struct r600_context *ctx, + struct r600_ring *ring, + struct r600_resource *rbo, enum radeon_bo_usage usage) { assert(usage); - return ctx->ws->cs_add_reloc(ctx->cs, rbo->cs_buf, usage, rbo->domains) * 4; + /* make sure that all previous ring use are flushed so everything + * look serialized from driver pov + */ + if (!ring->flushing) { + if (ring == &ctx->rings.gfx) { + /* flush dma ring */ + ctx->rings.dma.flush(ctx, RADEON_FLUSH_ASYNC); + } else { + /* flush gfx ring */ + ctx->rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC); + } + } + return ctx->ws->cs_add_reloc(ring->cs, rbo->cs_buf, usage, rbo->domains) * 4; } static INLINE void r600_write_value(struct radeon_winsys_cs *cs, unsigned value) diff --git a/src/gallium/drivers/r600/r600_query.c b/src/gallium/drivers/r600/r600_query.c index 083d510677a..03351892978 100644 --- a/src/gallium/drivers/r600/r600_query.c +++ b/src/gallium/drivers/r600/r600_query.c @@ -52,7 +52,7 @@ static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, uns switch (type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: - results = ctx->ws->buffer_map(buf->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE); + results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE); memset(results, 0, buf_size); /* Set top bits for unused backends. */ @@ -75,7 +75,7 @@ static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, uns case PIPE_QUERY_PRIMITIVES_GENERATED: case PIPE_QUERY_SO_STATISTICS: case PIPE_QUERY_SO_OVERFLOW_PREDICATE: - results = ctx->ws->buffer_map(buf->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE); + results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE); memset(results, 0, buf_size); ctx->ws->buffer_unmap(buf->cs_buf); break; @@ -106,7 +106,7 @@ static void r600_update_occlusion_query_state(struct r600_context *rctx, static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *query) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; uint64_t va; r600_update_occlusion_query_state(ctx, query->type, 1); @@ -154,7 +154,7 @@ static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *q assert(0); } cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE); + cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE); if (!r600_is_timer_query(query->type)) { ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw; @@ -163,7 +163,7 @@ static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *q static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *query) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; uint64_t va; /* The queries which need begin already called this in begin_query. */ @@ -206,7 +206,7 @@ static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *que assert(0); } cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer.buf, RADEON_USAGE_WRITE); + cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE); query->buffer.results_end += query->result_size; @@ -222,7 +222,7 @@ static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *que static void r600_emit_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, bool flag_wait) { - struct radeon_winsys_cs *cs = ctx->cs; + struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; if (operation == PREDICATION_OP_CLEAR) { r600_need_cs_space(ctx, 3, FALSE); @@ -256,7 +256,7 @@ static void r600_emit_query_predication(struct r600_context *ctx, struct r600_qu cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL; cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF); cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, qbuf->buf, RADEON_USAGE_READ); + cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ); results_base += query->result_size; /* set CONTINUE bit for all packets except the first */ @@ -351,7 +351,7 @@ static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) } /* Obtain a new buffer if the current one can't be mapped without a stall. */ - if (rctx->ws->cs_is_buffer_referenced(rctx->cs, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) || + if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) { pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type); @@ -406,9 +406,9 @@ static boolean r600_get_query_buffer_result(struct r600_context *ctx, unsigned results_base = 0; char *map; - map = ctx->ws->buffer_map(qbuf->buf->cs_buf, ctx->cs, - PIPE_TRANSFER_READ | - (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); + map = r600_buffer_mmap_sync_with_rings(ctx, qbuf->buf, + PIPE_TRANSFER_READ | + (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); if (!map) return FALSE; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 11802f0d2ed..727fac959cd 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -72,7 +72,7 @@ static int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *s if (shader->bo == NULL) { return -ENOMEM; } - ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); + ptr = r600_buffer_mmap_sync_with_rings(rctx, shader->bo, PIPE_TRANSFER_WRITE); if (R600_BIG_ENDIAN) { for (i = 0; i < rshader->bc.ndw; ++i) { ptr[i] = bswap_32(rshader->bc.bytecode[i]); diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index f3f7acb32c2..da249c247bc 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -646,7 +646,7 @@ boolean r600_is_format_supported(struct pipe_screen *screen, static void r600_emit_polygon_offset(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_poly_offset_state *state = (struct r600_poly_offset_state*)a; float offset_units = state->offset_units; float offset_scale = state->offset_scale; @@ -1142,7 +1142,7 @@ r600_create_sampler_view(struct pipe_context *ctx, static void r600_emit_clip_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct pipe_clip_state *state = &rctx->clip_state.state; r600_write_context_reg_seq(cs, R_028E20_PA_CL_UCP0_X, 6*4); @@ -1156,7 +1156,7 @@ static void r600_set_polygon_stipple(struct pipe_context *ctx, static void r600_emit_scissor_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct pipe_scissor_state *state = &rctx->scissor.scissor; if (rctx->chip_class != R600 || rctx->scissor.enable) { @@ -1653,7 +1653,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples) }; static unsigned max_dist_8x = 7; - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; unsigned max_dist = 0; if (rctx->family == CHIP_R600) { @@ -1720,7 +1720,7 @@ static void r600_emit_msaa_state(struct r600_context *rctx, int nr_samples) static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct pipe_framebuffer_state *state = &rctx->framebuffer.state; unsigned nr_cbufs = state->nr_cbufs; struct r600_surface **cb = (struct r600_surface**)&state->cbufs[0]; @@ -1750,6 +1750,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a /* relocations */ for (i = 0; i < nr_cbufs; i++) { unsigned reloc = r600_context_bo_reloc(rctx, + &rctx->rings.gfx, (struct r600_resource*)cb[i]->base.texture, RADEON_USAGE_READWRITE); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); @@ -1779,6 +1780,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a /* relocations */ for (i = 0; i < nr_cbufs; i++) { unsigned reloc = r600_context_bo_reloc(rctx, + &rctx->rings.gfx, cb[i]->cb_buffer_fmask, RADEON_USAGE_READWRITE); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); @@ -1793,6 +1795,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a /* relocations */ for (i = 0; i < nr_cbufs; i++) { unsigned reloc = r600_context_bo_reloc(rctx, + &rctx->rings.gfx, cb[i]->cb_buffer_cmask, RADEON_USAGE_READWRITE); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); @@ -1813,6 +1816,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a if (state->zsbuf) { struct r600_surface *surf = (struct r600_surface*)state->zsbuf; unsigned reloc = r600_context_bo_reloc(rctx, + &rctx->rings.gfx, (struct r600_resource*)state->zsbuf->texture, RADEON_USAGE_READWRITE); @@ -1867,7 +1871,7 @@ static void r600_emit_framebuffer_state(struct r600_context *rctx, struct r600_a static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom; if (G_028808_SPECIAL_OP(a->cb_color_control) == V_028808_SPECIAL_RESOLVE_BOX) { @@ -1897,7 +1901,7 @@ static void r600_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_db_state *a = (struct r600_db_state*)atom; if (a->rsurf && a->rsurf->htile_enabled) { @@ -1907,7 +1911,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom r600_write_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear)); r600_write_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface); r600_write_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base); - reloc_idx = r600_context_bo_reloc(rctx, rtex->htile, RADEON_USAGE_READWRITE); + reloc_idx = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rtex->htile, RADEON_USAGE_READWRITE); cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); cs->buf[cs->cdw++] = reloc_idx; } else { @@ -1917,7 +1921,7 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_db_misc_state *a = (struct r600_db_misc_state*)atom; unsigned db_render_control = 0; unsigned db_render_override = @@ -1960,7 +1964,7 @@ static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_config_state *a = (struct r600_config_state*)atom; r600_write_config_reg(cs, R_008C04_SQ_GPR_RESOURCE_MGMT_1, a->sq_gpr_resource_mgmt_1); @@ -1968,7 +1972,7 @@ static void r600_emit_config_state(struct r600_context *rctx, struct r600_atom * static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; uint32_t dirty_mask = rctx->vertex_buffer_state.dirty_mask; while (dirty_mask) { @@ -1997,7 +2001,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD6 */ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); - r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); + r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ)); } } @@ -2007,7 +2011,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, unsigned reg_alu_constbuf_size, unsigned reg_alu_const_cache) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -2027,7 +2031,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, r600_write_context_reg(cs, reg_alu_const_cache + buffer_index * 4, offset >> 8); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); - r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); + r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ)); r600_write_value(cs, PKT3(PKT3_SET_RESOURCE, 7, 0)); r600_write_value(cs, (buffer_id_base + buffer_index) * 7); @@ -2042,7 +2046,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx, r600_write_value(cs, 0xc0000000); /* RESOURCEi_WORD6 */ r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); - r600_write_value(cs, r600_context_bo_reloc(rctx, rbuffer, RADEON_USAGE_READ)); + r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, rbuffer, RADEON_USAGE_READ)); dirty_mask &= ~(1 << buffer_index); } @@ -2074,7 +2078,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx, struct r600_samplerview_state *state, unsigned resource_id_base) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; uint32_t dirty_mask = state->dirty_mask; while (dirty_mask) { @@ -2089,7 +2093,7 @@ static void r600_emit_sampler_views(struct r600_context *rctx, r600_write_value(cs, (resource_id_base + resource_index) * 7); r600_write_array(cs, 7, rview->tex_resource_words); - reloc = r600_context_bo_reloc(rctx, rview->tex_resource, + reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rview->tex_resource, RADEON_USAGE_READ); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); r600_write_value(cs, reloc); @@ -2126,7 +2130,7 @@ static void r600_emit_sampler_states(struct r600_context *rctx, unsigned resource_id_base, unsigned border_color_reg) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; uint32_t dirty_mask = texinfo->states.dirty_mask; while (dirty_mask) { @@ -2187,7 +2191,7 @@ static void r600_emit_ps_sampler_states(struct r600_context *rctx, struct r600_a static void r600_emit_seamless_cube_map(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; unsigned tmp; tmp = S_009508_DISABLE_CUBE_ANISO(1) | @@ -2205,19 +2209,19 @@ static void r600_emit_sample_mask(struct r600_context *rctx, struct r600_atom *a struct r600_sample_mask *s = (struct r600_sample_mask*)a; uint8_t mask = s->sample_mask; - r600_write_context_reg(rctx->cs, R_028C48_PA_SC_AA_MASK, + r600_write_context_reg(rctx->rings.gfx.cs, R_028C48_PA_SC_AA_MASK, mask | (mask << 8) | (mask << 16) | (mask << 24)); } static void r600_emit_vertex_fetch_shader(struct r600_context *rctx, struct r600_atom *a) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_cso_state *state = (struct r600_cso_state*)a; struct r600_fetch_shader *shader = (struct r600_fetch_shader*)state->cso; r600_write_context_reg(cs, R_028894_SQ_PGM_START_FS, shader->offset >> 8); r600_write_value(cs, PKT3(PKT3_NOP, 0, 0)); - r600_write_value(cs, r600_context_bo_reloc(rctx, shader->buffer, RADEON_USAGE_READ)); + r600_write_value(cs, r600_context_bo_reloc(rctx, &rctx->rings.gfx, shader->buffer, RADEON_USAGE_READ)); } void r600_init_state_functions(struct r600_context *rctx) diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 34977373d21..c7f672f748c 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -65,12 +65,12 @@ void r600_init_atom(struct r600_context *rctx, void r600_emit_cso_state(struct r600_context *rctx, struct r600_atom *atom) { - r600_emit_command_buffer(rctx->cs, ((struct r600_cso_state*)atom)->cb); + r600_emit_command_buffer(rctx->rings.gfx.cs, ((struct r600_cso_state*)atom)->cb); } void r600_emit_alphatest_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_alphatest_state *a = (struct r600_alphatest_state*)atom; unsigned alpha_ref = a->sx_alpha_ref; @@ -176,7 +176,7 @@ static void r600_set_blend_color(struct pipe_context *ctx, void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct pipe_blend_color *state = &rctx->blend_color.state; r600_write_context_reg_seq(cs, R_028414_CB_BLEND_RED, 4); @@ -188,7 +188,7 @@ void r600_emit_blend_color(struct r600_context *rctx, struct r600_atom *atom) void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_vgt_state *a = (struct r600_vgt_state *)atom; r600_write_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, a->vgt_multi_prim_ib_reset_en); @@ -197,7 +197,7 @@ void r600_emit_vgt_state(struct r600_context *rctx, struct r600_atom *atom) void r600_emit_vgt2_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_vgt2_state *a = (struct r600_vgt2_state *)atom; r600_write_context_reg(cs, R_028408_VGT_INDX_OFFSET, a->vgt_indx_offset); @@ -231,7 +231,7 @@ static void r600_set_stencil_ref(struct pipe_context *ctx, void r600_emit_stencil_ref(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_stencil_ref_state *a = (struct r600_stencil_ref_state*)atom; r600_write_context_reg_seq(cs, R_028430_DB_STENCILREFMASK, 2); @@ -658,7 +658,7 @@ static void r600_set_viewport_state(struct pipe_context *ctx, void r600_emit_viewport_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct pipe_viewport_state *state = &rctx->viewport.state; r600_write_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE_0, 6); @@ -1243,7 +1243,7 @@ static unsigned r600_conv_prim_to_gs_out(unsigned mode) void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom) { - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; struct r600_clip_misc_state *state = &rctx->clip_misc_state; r600_write_context_reg(cs, R_028810_PA_CL_CLIP_CNTL, @@ -1261,7 +1261,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info struct pipe_index_buffer ib = {}; unsigned i; struct r600_block *dirty_block = NULL, *next_block = NULL; - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; if (!info.count && (info.indexed || !info.count_from_stream_output)) { assert(0); @@ -1280,6 +1280,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info return; } + /* make sure that the gfx ring is only one active */ + rctx->rings.dma.flush(rctx, RADEON_FLUSH_ASYNC); + if (info.indexed) { /* Initialize the index buffer struct. */ pipe_resource_reference(&ib.buffer, rctx->index_buffer.buffer); @@ -1405,7 +1408,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = info.count; cs->buf[cs->cdw++] = V_0287F0_DI_SRC_SEL_DMA; cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, rctx->predicate_drawing); - cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, (struct r600_resource*)ib.buffer, RADEON_USAGE_READ); + cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, &rctx->rings.gfx, (struct r600_resource*)ib.buffer, RADEON_USAGE_READ); } } else { if (info.count_from_stream_output) { @@ -1422,7 +1425,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info cs->buf[cs->cdw++] = 0; /* unused */ cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); - cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->buf_filled_size, RADEON_USAGE_READ); + cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, &rctx->rings.gfx, t->buf_filled_size, RADEON_USAGE_READ); } cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->predicate_drawing); @@ -1734,12 +1737,12 @@ void r600_init_common_state_functions(struct r600_context *rctx) void r600_trace_emit(struct r600_context *rctx) { struct r600_screen *rscreen = rctx->screen; - struct radeon_winsys_cs *cs = rctx->cs; + struct radeon_winsys_cs *cs = rctx->rings.gfx.cs; uint64_t va; uint32_t reloc; va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo); - reloc = r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE); + reloc = r600_context_bo_reloc(rctx, &rctx->rings.gfx, rscreen->trace_bo, RADEON_USAGE_READWRITE); r600_write_value(cs, PKT3(PKT3_MEM_WRITE, 3, 0)); r600_write_value(cs, va & 0xFFFFFFFFUL); r600_write_value(cs, (va >> 32UL) & 0xFFUL); diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c index 340fa112f94..b0c55145f1f 100644 --- a/src/gallium/drivers/r600/r600_texture.c +++ b/src/gallium/drivers/r600/r600_texture.c @@ -724,7 +724,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, struct r600_transfer *trans; boolean use_staging_texture = FALSE; enum pipe_format format = texture->format; - struct radeon_winsys_cs_handle *buf; + struct r600_resource *buf; unsigned offset = 0; char *map; @@ -745,7 +745,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, /* Use a staging texture for uploads if the underlying BO is busy. */ if (!(usage & PIPE_TRANSFER_READ) && - (rctx->ws->cs_is_buffer_referenced(rctx->cs, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) || + (r600_rings_is_buffer_referenced(rctx, rtex->resource.cs_buf, RADEON_USAGE_READWRITE) || rctx->ws->buffer_is_busy(rtex->resource.buf, RADEON_USAGE_READWRITE))) { use_staging_texture = TRUE; } @@ -838,8 +838,9 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, trans->transfer.layer_stride = staging->surface.level[0].slice_size; if (usage & PIPE_TRANSFER_READ) { r600_copy_to_staging_texture(ctx, trans); - /* Always referenced in the blit. */ - r600_flush(ctx, NULL, 0); + /* flush gfx & dma ring, order does not matter as only one can be live */ + rctx->rings.dma.flush(rctx, 0); + rctx->rings.gfx.flush(rctx, 0); } } else { trans->transfer.stride = rtex->surface.level[level].pitch_bytes; @@ -848,9 +849,9 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, } if (trans->staging) { - buf = trans->staging->cs_buf; + buf = trans->staging; } else { - buf = rtex->resource.cs_buf; + buf = &rtex->resource; } if (rtex->is_depth || !trans->staging) @@ -858,7 +859,7 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, box->y / util_format_get_blockheight(format) * trans->transfer.stride + box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format); - if (!(map = rctx->ws->buffer_map(buf, rctx->cs, usage))) { + if (!(map = r600_buffer_mmap_sync_with_rings(rctx, buf, usage))) { pipe_resource_reference((struct pipe_resource**)&trans->staging, NULL); FREE(trans); return NULL; |