diff options
-rw-r--r-- | src/gallium/drivers/r300/r300_cs.h | 16 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_emit.c | 129 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_flush.c | 4 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_render.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/r300/r300_winsys.h | 41 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/Makefile | 1 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/SConscript | 1 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_buffer.c | 38 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_common.c | 8 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 360 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 42 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_r300.c | 99 | ||||
-rw-r--r-- | src/gallium/winsys/radeon/drm/radeon_winsys.h | 23 |
13 files changed, 505 insertions, 260 deletions
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 67fb0096a8c..6726f100e1b 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -106,26 +106,26 @@ * Writing relocations. */ -#define OUT_CS_RELOC(bo, offset, rd, wd) do { \ +#define OUT_CS_RELOC(bo, offset) do { \ assert(bo); \ OUT_CS(offset); \ - cs_winsys->cs_write_reloc(cs_copy, bo, rd, wd); \ + cs_winsys->cs_write_reloc(cs_copy, bo); \ CS_DEBUG(cs_count -= 2;) \ } while (0) -#define OUT_CS_BUF_RELOC(bo, offset, rd, wd) do { \ +#define OUT_CS_BUF_RELOC(bo, offset) do { \ assert(bo); \ - OUT_CS_RELOC(r300_buffer(bo)->cs_buf, offset, rd, wd); \ + OUT_CS_RELOC(r300_buffer(bo)->cs_buf, offset); \ } while (0) -#define OUT_CS_TEX_RELOC(tex, offset, rd, wd) do { \ +#define OUT_CS_TEX_RELOC(tex, offset) do { \ assert(tex); \ - OUT_CS_RELOC(tex->cs_buffer, offset, rd, wd); \ + OUT_CS_RELOC(tex->cs_buffer, offset); \ } while (0) -#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd) do { \ +#define OUT_CS_BUF_RELOC_NO_OFFSET(bo) do { \ assert(bo); \ - cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->cs_buf, rd, wd); \ + cs_winsys->cs_write_reloc(cs_copy, r300_buffer(bo)->cs_buf); \ CS_DEBUG(cs_count -= 2;) \ } while (0) diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 41a12708cee..d14cdcbbaf0 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -353,10 +353,10 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) if (aa->dest) { OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); - OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->offset, 0, aa->dest->domain); + OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->offset); OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); - OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->pitch, 0, aa->dest->domain); + OUT_CS_RELOC(aa->dest->cs_buffer, aa->dest->pitch); } OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); @@ -392,10 +392,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) surf = r300_surface(fb->cbufs[i]); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(surf->cs_buffer, surf->offset, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->offset); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(surf->cs_buffer, surf->pitch, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->pitch); } /* Set up the ZB part of the CBZB clear. */ @@ -405,10 +405,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_midpoint_offset, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_midpoint_offset); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_pitch, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->cbzb_pitch); DBG(r300, DBG_CBZB, "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, @@ -421,10 +421,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_FORMAT, surf->format); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->offset, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->offset); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->cs_buffer, surf->pitch, 0, surf->domain); + OUT_CS_RELOC(surf->cs_buffer, surf->pitch); if (can_hyperz) { uint32_t surf_pitch; @@ -589,28 +589,24 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 3) * 4, - 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 3) * 4); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 2) * 4, - 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 2) * 4); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, - 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, - 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -632,7 +628,7 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain); + OUT_CS_RELOC(buf, query->num_results * 4); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -646,10 +642,10 @@ static void rv530_emit_query_end_double_z(struct r300_context *r300, BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -810,8 +806,7 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2); OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); - OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain, - 0); + OUT_CS_TEX_RELOC(tex, texstate->format.tile_config); } } END_CS; @@ -899,7 +894,7 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean inde for (i = 0; i < vertex_array_count; i++) { buf = r300_buffer(valid_vbuf[velem[i].vertex_buffer_index]); - OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0); + OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b); } END_CS; } @@ -924,7 +919,7 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->draw_vbo_offset); - OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0); + OUT_CS_BUF_RELOC(r300->vbo, 0); END_CS; } @@ -1220,65 +1215,77 @@ boolean r300_emit_buffer_validate(struct r300_context *r300, boolean do_validate_vertex_buffers, struct pipe_resource *index_buffer) { - struct pipe_framebuffer_state* fb = + struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; struct r300_textures_state *texstate = (struct r300_textures_state*)r300->textures_state.state; - struct r300_texture* tex; - struct pipe_resource **vbuf = r300->valid_vertex_buffer; + struct r300_texture *tex; unsigned i; - - /* Clean out BOs. */ - r300->rws->cs_reset_buffers(r300->cs); - - /* Color buffers... */ - for (i = 0; i < fb->nr_cbufs; i++) { - tex = r300_texture(fb->cbufs[i]->texture); - assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, 0, - r300_surface(fb->cbufs[i])->domain); - } - /* ...depth buffer... */ - if (fb->zsbuf) { - tex = r300_texture(fb->zsbuf->texture); - assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, 0, - r300_surface(fb->zsbuf)->domain); - } - /* ...textures... */ - for (i = 0; i < texstate->count; i++) { - if (!(texstate->tx_enable & (1 << i))) { - continue; + boolean flushed = FALSE; + +validate: + if (r300->fb_state.dirty) { + /* Color buffers... */ + for (i = 0; i < fb->nr_cbufs; i++) { + tex = r300_texture(fb->cbufs[i]->texture); + assert(tex && tex->buffer && "cbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0, + r300_surface(fb->cbufs[i])->domain); } + /* ...depth buffer... */ + if (fb->zsbuf) { + tex = r300_texture(fb->zsbuf->texture); + assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, 0, + r300_surface(fb->zsbuf)->domain); + } + } + if (r300->textures_state.dirty) { + /* ...textures... */ + for (i = 0; i < texstate->count; i++) { + if (!(texstate->tx_enable & (1 << i))) { + continue; + } - tex = r300_texture(texstate->sampler_views[i]->base.texture); - r300->rws->cs_add_buffer(r300->cs, tex->cs_buffer, tex->domain, 0); + tex = r300_texture(texstate->sampler_views[i]->base.texture); + r300->rws->cs_add_reloc(r300->cs, tex->cs_buffer, tex->domain, 0); + } } /* ...occlusion query buffer... */ if (r300->query_current) - r300->rws->cs_add_buffer(r300->cs, r300->query_current->cs_buffer, - 0, r300->query_current->domain); + r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buffer, + 0, r300->query_current->domain); /* ...vertex buffer for SWTCL path... */ if (r300->vbo) - r300->rws->cs_add_buffer(r300->cs, r300_buffer(r300->vbo)->cs_buf, - r300_buffer(r300->vbo)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_buffer(r300->vbo)->cs_buf, + r300_buffer(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers) { - for (i = 0; i < r300->vertex_buffer_count; i++) { - if (!vbuf[i]) + struct pipe_resource **buf = r300->valid_vertex_buffer; + struct pipe_resource **last = r300->valid_vertex_buffer + + r300->vertex_buffer_count; + for (; buf != last; buf++) { + if (!*buf) continue; - r300->rws->cs_add_buffer(r300->cs, r300_buffer(vbuf[i])->cs_buf, - r300_buffer(vbuf[i])->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_buffer(*buf)->cs_buf, + r300_buffer(*buf)->domain, 0); } } /* ...and index buffer for HWTCL path. */ if (index_buffer) - r300->rws->cs_add_buffer(r300->cs, r300_buffer(index_buffer)->cs_buf, - r300_buffer(index_buffer)->domain, 0); + r300->rws->cs_add_reloc(r300->cs, r300_buffer(index_buffer)->cs_buf, + r300_buffer(index_buffer)->domain, 0); + /* Now do the validation. */ if (!r300->rws->cs_validate(r300->cs)) { - return FALSE; + /* Ooops, an infinite loop, give up. */ + if (flushed) + return FALSE; + + r300->context.flush(&r300->context, 0, NULL); + flushed = TRUE; + goto validate; } return TRUE; diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c index 7a26a86a912..b250532ba92 100644 --- a/src/gallium/drivers/r300/r300_flush.c +++ b/src/gallium/drivers/r300/r300_flush.c @@ -72,6 +72,10 @@ static void r300_flush(struct pipe_context* pipe, r300->validate_buffers = TRUE; r300->upload_vb_validated = FALSE; r300->upload_ib_validated = FALSE; + } else { + /* Even if hw is not dirty, we should at least reset the CS in case + * the space checking failed for the first draw operation. */ + r300->rws->cs_flush(r300->cs); } /* reset flushed query */ diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index dfa3045d649..e660ca68f1b 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -537,8 +537,7 @@ static void r300_emit_draw_elements(struct r300_context *r300, OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) | (0 << R300_INDX_BUFFER_SKIP_SHIFT)); OUT_CS(offset_dwords << 2); - OUT_CS_BUF_RELOC(indexBuffer, count_dwords, - r300_buffer(indexBuffer)->domain, 0); + OUT_CS_BUF_RELOC(indexBuffer, count_dwords); END_CS; } diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h index b8324afe511..460da77a4fb 100644 --- a/src/gallium/drivers/r300/r300_winsys.h +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -42,8 +42,8 @@ struct r300_winsys_buffer; /* for map/unmap etc. */ struct r300_winsys_cs_buffer; /* for write_reloc etc. */ struct r300_winsys_cs { - unsigned cdw; /* Number of used dwords. */ - uint32_t *buf; /* The command buffer. */ + unsigned cdw; /* Number of used dwords. */ + uint32_t buf[R300_MAX_CMDBUF_DWORDS]; /* The command buffer. */ }; enum r300_value_id { @@ -239,23 +239,22 @@ struct r300_winsys_screen { void (*cs_destroy)(struct r300_winsys_cs *cs); /** - * Add a buffer object to the list of buffers to validate. + * Add a new buffer relocation. Every relocation must first be added + * before it can be written. * - * \param cs A command stream to add buffer for validation against. - * \param buf A winsys buffer to validate. - * \param rd A read domain containing a bitmask - * of the R300_DOMAIN_* flags. - * \param wd A write domain containing a bitmask - * of the R300_DOMAIN_* flags. + * \param cs A command stream to add buffer for validation against. + * \param buf A winsys buffer to validate. + * \param rd A read domain containing a bitmask of the R300_DOMAIN_* flags. + * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ - void (*cs_add_buffer)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); + void (*cs_add_reloc)(struct r300_winsys_cs *cs, + struct r300_winsys_cs_buffer *buf, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd); /** - * Revalidate all currently set up winsys buffers. - * Returns TRUE if a flush is required. + * Return TRUE if there is enough memory in VRAM and GTT for the relocs + * added so far. * * \param cs A command stream to validate. */ @@ -270,9 +269,7 @@ struct r300_winsys_screen { * \param wd A write domain containing a bitmask of the R300_DOMAIN_* flags. */ void (*cs_write_reloc)(struct r300_winsys_cs *cs, - struct r300_winsys_cs_buffer *buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd); + struct r300_winsys_cs_buffer *buf); /** * Flush a command stream. @@ -294,14 +291,6 @@ struct r300_winsys_screen { void *user); /** - * Reset the list of buffer objects to validate, usually called - * prior to adding buffer objects for validation. - * - * \param cs A command stream to reset buffers for. - */ - void (*cs_reset_buffers)(struct r300_winsys_cs *cs); - - /** * Return TRUE if a buffer is referenced by a command stream or by hardware * (i.e. is busy), based on the domain parameter. * diff --git a/src/gallium/winsys/radeon/drm/Makefile b/src/gallium/winsys/radeon/drm/Makefile index aa73edde34e..7e339a2ecfe 100644 --- a/src/gallium/winsys/radeon/drm/Makefile +++ b/src/gallium/winsys/radeon/drm/Makefile @@ -6,6 +6,7 @@ LIBNAME = radeonwinsys C_SOURCES = \ radeon_drm_buffer.c \ + radeon_drm_cs.c \ radeon_drm_common.c \ radeon_r300.c diff --git a/src/gallium/winsys/radeon/drm/SConscript b/src/gallium/winsys/radeon/drm/SConscript index 2dbf61a7ba3..80816621848 100644 --- a/src/gallium/winsys/radeon/drm/SConscript +++ b/src/gallium/winsys/radeon/drm/SConscript @@ -4,6 +4,7 @@ env = env.Clone() radeon_sources = [ 'radeon_drm_buffer.c', + 'radeon_drm_cs.c', 'radeon_drm_common.c', 'radeon_r300.c', ] diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c index dc4b51747f4..5e14287ec2d 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_buffer.c @@ -1,5 +1,5 @@ -#include "radeon_cs_gem.h" #include "radeon_drm_buffer.h" +#include "radeon_drm_cs.h" #include "util/u_hash_table.h" #include "util/u_memory.h" @@ -155,7 +155,7 @@ radeon_drm_buffer_map_internal(struct pb_buffer *_buf, * we cannot flush. */ assert(cs || !radeon_bo_is_referenced_by_cs(buf->bo, NULL)); - if (cs && radeon_bo_is_referenced_by_cs(buf->bo, cs->cs)) { + if (cs && radeon_bo_is_referenced_by_cs(buf->bo, NULL)) { cs->flush_cs(cs->flush_data); } @@ -463,35 +463,6 @@ static void radeon_drm_buffer_set_tiling(struct r300_winsys_screen *ws, radeon_bo_set_tiling(buf->bo, flags, pitch); } -static void radeon_drm_bufmgr_add_buffer(struct r300_winsys_cs *rcs, - struct r300_winsys_cs_buffer *_buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd) -{ - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - struct radeon_bo *bo = (struct radeon_bo*)_buf; - - radeon_cs_space_add_persistent_bo(cs->cs, bo, rd, wd); -} - -static void radeon_drm_bufmgr_write_reloc(struct r300_winsys_cs *rcs, - struct r300_winsys_cs_buffer *_buf, - enum r300_buffer_domain rd, - enum r300_buffer_domain wd) -{ - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - struct radeon_bo *bo = (struct radeon_bo*)_buf; - int retval; - - cs->cs->cdw = cs->base.cdw; - retval = radeon_cs_write_reloc(cs->cs, bo, rd, wd, 0); - cs->base.cdw = cs->cs->cdw; - if (retval) { - fprintf(stderr, "radeon: Relocation of %p (%d, %d, %d) failed!\n", - bo, rd, wd, 0); - } -} - static struct r300_winsys_cs_buffer *radeon_drm_get_cs_handle( struct r300_winsys_screen *rws, struct r300_winsys_buffer *_buf) @@ -505,12 +476,11 @@ static boolean radeon_drm_is_buffer_referenced(struct r300_winsys_cs *rcs, struct r300_winsys_cs_buffer *_buf, enum r300_reference_domain domain) { - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); struct radeon_bo *bo = (struct radeon_bo*)_buf; uint32_t tmp; if (domain & R300_REF_CS) { - if (radeon_bo_is_referenced_by_cs(bo, cs->cs)) { + if (radeon_bo_is_referenced_by_cs(bo, NULL)) { return TRUE; } } @@ -559,6 +529,4 @@ void radeon_drm_bufmgr_init_functions(struct radeon_drm_winsys *ws) ws->base.buffer_unmap = radeon_drm_buffer_unmap; ws->base.buffer_wait = radeon_drm_buffer_wait; ws->base.cs_is_buffer_referenced = radeon_drm_is_buffer_referenced; - ws->base.cs_add_buffer = radeon_drm_bufmgr_add_buffer; - ws->base.cs_write_reloc = radeon_drm_bufmgr_write_reloc; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_common.c b/src/gallium/winsys/radeon/drm/radeon_drm_common.c index edd7cd3b7b8..fe71f080592 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_common.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_common.c @@ -31,6 +31,7 @@ #include "radeon_winsys.h" #include "radeon_drm_buffer.h" +#include "radeon_drm_cs.h" #include "radeon_drm_public.h" #include "pipebuffer/pb_bufmgr.h" @@ -185,7 +186,6 @@ static void radeon_winsys_destroy(struct r300_winsys_screen *rws) ws->kman->destroy(ws->kman); radeon_bo_manager_gem_dtor(ws->bom); - radeon_cs_manager_gem_dtor(ws->csm); FREE(rws); } @@ -207,9 +207,6 @@ struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd) ws->bom = radeon_bo_manager_gem_ctor(fd); if (!ws->bom) goto fail; - ws->csm = radeon_cs_manager_gem_ctor(fd); - if (!ws->csm) - goto fail; ws->kman = radeon_drm_bufmgr_create(ws); if (!ws->kman) goto fail; @@ -221,6 +218,7 @@ struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd) ws->base.destroy = radeon_winsys_destroy; radeon_drm_bufmgr_init_functions(ws); + radeon_drm_cs_init_functions(ws); radeon_winsys_init_functions(ws); return &ws->base; @@ -228,8 +226,6 @@ struct r300_winsys_screen *r300_drm_winsys_screen_create(int fd) fail: if (ws->bom) radeon_bo_manager_gem_dtor(ws->bom); - if (ws->csm) - radeon_cs_manager_gem_dtor(ws->csm); if (ws->cman) ws->cman->destroy(ws->cman); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c new file mode 100644 index 00000000000..60bc36b0929 --- /dev/null +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -0,0 +1,360 @@ +/* + * Copyright © 2008 Jérôme Glisse + * Copyright © 2010 Marek Olšák <[email protected]> + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Marek Olšák <[email protected]> + * + * Based on work from libdrm_radeon by: + * Aapo Tahkola <[email protected]> + * Nicolai Haehnle <[email protected]> + * Jérôme Glisse <[email protected]> + */ + +/* + This file replaces libdrm's radeon_cs_gem with our own implemention. + It's optimized specifically for r300g, but r600g could use it as well. + Reloc writes and space checking are faster and simpler than their + counterparts in libdrm (the time complexity of all the functions + is O(1) in nearly all scenarios, thanks to hashing). + + It works like this: + + cs_add_reloc(cs, buf, read_domain, write_domain) adds a new relocation and + also adds the size of 'buf' to the used_gart and used_vram winsys variables + based on the domains, which are simply or'd for the accounting purposes. + The adding is skipped if the reloc is already present in the list, but it + accounts any newly-referenced domains. + + cs_validate is then called, which just checks: + used_vram/gart < vram/gart_size * 0.8 + The 0.8 number allows for some memory fragmentation. If the validation + fails, the pipe driver flushes CS and tries do the validation again, + i.e. it validates only that one operation. If it fails again, it drops + the operation on the floor and prints some nasty message to stderr. + (done in the pipe driver) + + cs_write_reloc(cs, buf) just writes a reloc that has been added using + cs_add_reloc. The read_domain and write_domain parameters have been removed, + because we already specify them in cs_add_reloc. +*/ + +#include "radeon_drm_cs.h" +#include "radeon_drm_buffer.h" + +#include "util/u_memory.h" + +#include <stdlib.h> +#include <stdint.h> +#include <radeon_bo.h> +#include <xf86drm.h> + +#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) + +static struct r300_winsys_cs *radeon_drm_cs_create(struct r300_winsys_screen *rws) +{ + struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); + struct radeon_drm_cs *cs; + + cs = CALLOC_STRUCT(radeon_drm_cs); + if (!cs) { + return NULL; + } + + cs->ws = ws; + cs->nrelocs = 256; + cs->relocs_bo = (struct radeon_bo**) + CALLOC(1, cs->nrelocs * sizeof(struct radeon_bo*)); + if (!cs->relocs_bo) { + FREE(cs); + return NULL; + } + + cs->relocs = (struct drm_radeon_cs_reloc*) + CALLOC(1, cs->nrelocs * sizeof(struct drm_radeon_cs_reloc)); + if (!cs->relocs) { + FREE(cs->relocs_bo); + FREE(cs); + return NULL; + } + + cs->chunks[0].chunk_id = RADEON_CHUNK_ID_IB; + cs->chunks[0].length_dw = 0; + cs->chunks[0].chunk_data = (uint64_t)(uintptr_t)cs->base.buf; + cs->chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; + cs->chunks[1].length_dw = 0; + cs->chunks[1].chunk_data = (uint64_t)(uintptr_t)cs->relocs; + return &cs->base; +} + +#define OUT_CS(cs, value) (cs)->buf[(cs)->cdw++] = (value) + +static inline void update_domains(struct drm_radeon_cs_reloc *reloc, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd, + enum r300_buffer_domain *added_domains) +{ + *added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain); + + if (reloc->read_domains & wd) { + reloc->read_domains = rd; + reloc->write_domain = wd; + } else if (rd & reloc->write_domain) { + reloc->read_domains = rd; + reloc->write_domain |= wd; + } else { + reloc->read_domains |= rd; + reloc->write_domain |= wd; + } +} + +static int radeon_get_reloc(struct radeon_drm_cs *cs, + struct radeon_bo *bo) +{ + struct drm_radeon_cs_reloc *reloc; + unsigned i; + unsigned hash = bo->handle & (sizeof(cs->is_handle_added)-1); + + if (cs->is_handle_added[hash]) { + reloc = cs->relocs_hashlist[hash]; + if (reloc->handle == bo->handle) { + return cs->reloc_indices_hashlist[hash]; + } + + /* Hash collision, look for the BO in the list of relocs linearly. */ + for (i = cs->crelocs; i != 0;) { + --i; + reloc = &cs->relocs[i]; + if (reloc->handle == bo->handle) { + /* Put this reloc in the hash list. + * This will prevent additional hash collisions if there are + * several subsequent get_reloc calls of the same buffer. + * + * Example: Assuming buffers A,B,C collide in the hash list, + * the following sequence of relocs: + * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC + * will collide here: ^ and here: ^, + * meaning that we should get very few collisions in the end. */ + cs->relocs_hashlist[hash] = reloc; + cs->reloc_indices_hashlist[hash] = i; + /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ + return i; + } + } + } + + return -1; +} + +static void radeon_add_reloc(struct radeon_drm_cs *cs, + struct radeon_bo *bo, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd, + enum r300_buffer_domain *added_domains) +{ + struct drm_radeon_cs_reloc *reloc; + unsigned i; + unsigned hash = bo->handle & (sizeof(cs->is_handle_added)-1); + + if (cs->is_handle_added[hash]) { + reloc = cs->relocs_hashlist[hash]; + if (reloc->handle == bo->handle) { + update_domains(reloc, rd, wd, added_domains); + return; + } + + /* Hash collision, look for the BO in the list of relocs linearly. */ + for (i = cs->crelocs; i != 0;) { + --i; + reloc = &cs->relocs[i]; + if (reloc->handle == bo->handle) { + update_domains(reloc, rd, wd, added_domains); + + cs->relocs_hashlist[hash] = reloc; + cs->reloc_indices_hashlist[hash] = i; + /*printf("write_reloc collision, hash: %i, handle: %i\n", hash, bo->handle);*/ + return; + } + } + } + + /* New relocation, check if the backing array is large enough. */ + if (cs->crelocs >= cs->nrelocs) { + uint32_t size; + cs->nrelocs += 10; + + size = cs->nrelocs * sizeof(struct radeon_bo*); + cs->relocs_bo = (struct radeon_bo**)realloc(cs->relocs_bo, size); + + size = cs->nrelocs * sizeof(struct drm_radeon_cs_reloc); + cs->relocs = (struct drm_radeon_cs_reloc*)realloc(cs->relocs, size); + + cs->chunks[1].chunk_data = (uint64_t)(uintptr_t)cs->relocs; + } + + /* Initialize the new relocation. */ + radeon_bo_ref(bo); + cs->relocs_bo[cs->crelocs] = bo; + reloc = &cs->relocs[cs->crelocs]; + reloc->handle = bo->handle; + reloc->read_domains = rd; + reloc->write_domain = wd; + reloc->flags = 0; + + cs->is_handle_added[hash] = TRUE; + cs->relocs_hashlist[hash] = reloc; + cs->reloc_indices_hashlist[hash] = cs->crelocs; + + cs->chunks[1].length_dw += RELOC_DWORDS; + cs->crelocs++; + + *added_domains = rd | wd; +} + +static void radeon_drm_cs_add_reloc(struct r300_winsys_cs *rcs, + struct r300_winsys_cs_buffer *buf, + enum r300_buffer_domain rd, + enum r300_buffer_domain wd) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_bo *bo = (struct radeon_bo*)buf; + enum r300_buffer_domain added_domains; + + radeon_add_reloc(cs, bo, rd, wd, &added_domains); + + if (!added_domains) + return; + + if (added_domains & R300_DOMAIN_GTT) + cs->used_gart += bo->size; + if (added_domains & R300_DOMAIN_VRAM) + cs->used_vram += bo->size; +} + +static boolean radeon_drm_cs_validate(struct r300_winsys_cs *rcs) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + + return cs->used_gart < cs->ws->gart_size * 0.8 && + cs->used_vram < cs->ws->vram_size * 0.8; +} + +static void radeon_drm_cs_write_reloc(struct r300_winsys_cs *rcs, + struct r300_winsys_cs_buffer *buf) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + struct radeon_bo *bo = (struct radeon_bo*)buf; + + unsigned index = radeon_get_reloc(cs, bo); + + if (index == -1) { + fprintf(stderr, "r300: Cannot get a relocation in %s.\n", __func__); + return; + } + + OUT_CS(&cs->base, 0xc0001000); + OUT_CS(&cs->base, index * RELOC_DWORDS); +} + +static void radeon_drm_cs_emit(struct r300_winsys_cs *rcs) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + uint64_t chunk_array[2]; + unsigned i; + int r; + + if (cs->base.cdw) { + /* Unmap buffers. */ + radeon_drm_bufmgr_flush_maps(cs->ws->kman); + + /* Prepare the arguments. */ + cs->chunks[0].length_dw = cs->base.cdw; + + chunk_array[0] = (uint64_t)(uintptr_t)&cs->chunks[0]; + chunk_array[1] = (uint64_t)(uintptr_t)&cs->chunks[1]; + + cs->cs.num_chunks = 2; + cs->cs.chunks = (uint64_t)(uintptr_t)chunk_array; + + /* Emit. */ + r = drmCommandWriteRead(cs->ws->fd, DRM_RADEON_CS, + &cs->cs, sizeof(struct drm_radeon_cs)); + if (r) { + if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { + fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); + fprintf(stderr, "VENDORID:DEVICEID 0x%04X:0x%04X\n", 0x1002, + cs->ws->pci_id); + for (i = 0; i < cs->base.cdw; i++) { + fprintf(stderr, "0x%08X\n", cs->base.buf[i]); + } + } else { + fprintf(stderr, "radeon: The kernel rejected CS, " + "see dmesg for more information.\n"); + } + } + } + + /* Unreference buffers, cleanup. */ + for (i = 0; i < cs->crelocs; i++) { + radeon_bo_unref((struct radeon_bo*)cs->relocs_bo[i]); + cs->relocs_bo[i] = NULL; + } + + cs->base.cdw = 0; + cs->crelocs = 0; + cs->chunks[0].length_dw = 0; + cs->chunks[1].length_dw = 0; + cs->used_gart = 0; + cs->used_vram = 0; + memset(cs->is_handle_added, 0, sizeof(cs->is_handle_added)); +} + +static void radeon_drm_cs_destroy(struct r300_winsys_cs *rcs) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + FREE(cs->relocs_bo); + FREE(cs->relocs); + FREE(cs); +} + +static void radeon_drm_cs_set_flush(struct r300_winsys_cs *rcs, + void (*flush)(void *), void *user) +{ + struct radeon_drm_cs *cs = radeon_drm_cs(rcs); + cs->flush_cs = flush; + cs->flush_data = user; +} + +void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws) +{ + ws->base.cs_create = radeon_drm_cs_create; + ws->base.cs_destroy = radeon_drm_cs_destroy; + ws->base.cs_add_reloc = radeon_drm_cs_add_reloc; + ws->base.cs_validate = radeon_drm_cs_validate; + ws->base.cs_write_reloc = radeon_drm_cs_write_reloc; + ws->base.cs_flush = radeon_drm_cs_emit; + ws->base.cs_set_flush = radeon_drm_cs_set_flush; +} diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h new file mode 100644 index 00000000000..76046534b65 --- /dev/null +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -0,0 +1,42 @@ +#ifndef RADEON_DRM_CS_H +#define RADEON_DRM_CS_H + +#include "radeon_winsys.h" +#include <radeon_drm.h> + +struct radeon_drm_cs { + struct r300_winsys_cs base; + + /* The winsys. */ + struct radeon_drm_winsys *ws; + + /* Flush CS. */ + void (*flush_cs)(void *); + void *flush_data; + + /* Relocs. */ + unsigned crelocs; + unsigned nrelocs; + struct drm_radeon_cs_reloc *relocs; + struct radeon_bo **relocs_bo; + struct drm_radeon_cs cs; + struct drm_radeon_cs_chunk chunks[2]; + + unsigned used_vram; + unsigned used_gart; + + /* 0 = BO not added, 1 = BO added */ + char is_handle_added[256]; + struct drm_radeon_cs_reloc *relocs_hashlist[256]; + unsigned reloc_indices_hashlist[256]; +}; + +static INLINE struct radeon_drm_cs * +radeon_drm_cs(struct r300_winsys_cs *base) +{ + return (struct radeon_drm_cs*)base; +} + +void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws); + +#endif diff --git a/src/gallium/winsys/radeon/drm/radeon_r300.c b/src/gallium/winsys/radeon/drm/radeon_r300.c index ae7020a0639..bacf181b47c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_r300.c +++ b/src/gallium/winsys/radeon/drm/radeon_r300.c @@ -25,7 +25,6 @@ #include "util/u_memory.h" #include "pipebuffer/pb_bufmgr.h" -#include "radeon_cs_gem.h" #include "state_tracker/drm_driver.h" static unsigned get_pb_usage_from_create_flags(unsigned bind, unsigned usage, @@ -134,65 +133,6 @@ static boolean radeon_r300_winsys_buffer_get_handle(struct r300_winsys_screen *r return radeon_drm_bufmgr_get_handle(_buf, whandle); } -static void radeon_r300_winsys_cs_set_flush(struct r300_winsys_cs *rcs, - void (*flush)(void *), - void *user) -{ - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - cs->flush_cs = flush; - cs->flush_data = user; - radeon_cs_space_set_flush(cs->cs, flush, user); -} - -static boolean radeon_r300_winsys_cs_validate(struct r300_winsys_cs *rcs) -{ - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - - return radeon_cs_space_check(cs->cs) >= 0; -} - -static void radeon_r300_winsys_cs_reset_buffers(struct r300_winsys_cs *rcs) -{ - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - radeon_cs_space_reset_bos(cs->cs); -} - -static void radeon_r300_winsys_cs_flush(struct r300_winsys_cs *rcs) -{ - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - int retval; - - /* Don't flush a zero-sized CS. */ - if (!cs->base.cdw) { - return; - } - - cs->cs->cdw = cs->base.cdw; - - radeon_drm_bufmgr_flush_maps(cs->ws->kman); - - /* Emit the CS. */ - retval = radeon_cs_emit(cs->cs); - if (retval) { - if (debug_get_bool_option("RADEON_DUMP_CS", FALSE)) { - fprintf(stderr, "radeon: The kernel rejected CS, dumping...\n"); - radeon_cs_print(cs->cs, stderr); - } else { - fprintf(stderr, "radeon: The kernel rejected CS, " - "see dmesg for more information.\n"); - } - } - - /* Reset CS. - * Someday, when we care about performance, we should really find a way - * to rotate between two or three CS objects so that the GPU can be - * spinning through one CS while another one is being filled. */ - radeon_cs_erase(cs->cs); - - cs->base.buf = cs->cs->packets; - cs->base.cdw = cs->cs->cdw; -} - static uint32_t radeon_get_value(struct r300_winsys_screen *rws, enum r300_value_id id) { @@ -219,39 +159,6 @@ static uint32_t radeon_get_value(struct r300_winsys_screen *rws, return 0; } -static struct r300_winsys_cs *radeon_r300_winsys_cs_create(struct r300_winsys_screen *rws) -{ - struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); - struct radeon_drm_cs *cs = CALLOC_STRUCT(radeon_drm_cs); - - if (!cs) - return NULL; - - /* Size limit on IBs is 64 kibibytes. */ - cs->cs = radeon_cs_create(ws->csm, 1024 * 64 / 4); - if (!cs->cs) { - FREE(cs); - return NULL; - } - - radeon_cs_set_limit(cs->cs, - RADEON_GEM_DOMAIN_GTT, ws->gart_size); - radeon_cs_set_limit(cs->cs, - RADEON_GEM_DOMAIN_VRAM, ws->vram_size); - - cs->ws = ws; - cs->base.buf = cs->cs->packets; - cs->base.cdw = cs->cs->cdw; - return &cs->base; -} - -static void radeon_r300_winsys_cs_destroy(struct r300_winsys_cs *rcs) -{ - struct radeon_drm_cs *cs = radeon_drm_cs(rcs); - radeon_cs_destroy(cs->cs); - FREE(cs); -} - void radeon_winsys_init_functions(struct radeon_drm_winsys *ws) { ws->base.get_value = radeon_get_value; @@ -259,10 +166,4 @@ void radeon_winsys_init_functions(struct radeon_drm_winsys *ws) ws->base.buffer_reference = radeon_r300_winsys_buffer_reference; ws->base.buffer_from_handle = radeon_r300_winsys_buffer_from_handle; ws->base.buffer_get_handle = radeon_r300_winsys_buffer_get_handle; - ws->base.cs_create = radeon_r300_winsys_cs_create; - ws->base.cs_destroy = radeon_r300_winsys_cs_destroy; - ws->base.cs_validate = radeon_r300_winsys_cs_validate; - ws->base.cs_flush = radeon_r300_winsys_cs_flush; - ws->base.cs_reset_buffers = radeon_r300_winsys_cs_reset_buffers; - ws->base.cs_set_flush = radeon_r300_winsys_cs_set_flush; } diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 6f232143f6a..492edfef8c3 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -55,31 +55,8 @@ struct radeon_drm_winsys { boolean drm_2_8_0; /* Hyper-Z user */ boolean hyperz; - - /* Radeon CS manager. */ - struct radeon_cs_manager *csm; }; -struct radeon_drm_cs { - struct r300_winsys_cs base; - - /* The winsys. */ - struct radeon_drm_winsys *ws; - - /* The libdrm command stream. */ - struct radeon_cs *cs; - - /* Flush CS. */ - void (*flush_cs)(void *); - void *flush_data; -}; - -static INLINE struct radeon_drm_cs * -radeon_drm_cs(struct r300_winsys_cs *base) -{ - return (struct radeon_drm_cs*)base; -} - static INLINE struct radeon_drm_winsys * radeon_drm_winsys(struct r300_winsys_screen *base) { |