aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <[email protected]>2012-12-09 17:56:26 +0100
committerMarek Olšák <[email protected]>2012-12-12 13:12:28 +0100
commit8df3855eed67302e83e4b181c4fa02183ccc185a (patch)
tree51415a2636e6341c093befb37e59a2b09278c991
parentcc2d908572d0ed97171e37e446372ab039ed5422 (diff)
r600g: suballocate memory for the STRMOUT_BUFFER_FILLED_SIZE register
Instead of having a 4-byte buffer for each streamout target, we suballocate each dword from a 4K buffer. This further reduces the overall number of relocations. Tested-by: Aaron Watry <[email protected]> Reviewed-by: Alex Deucher <[email protected]>
-rw-r--r--src/gallium/drivers/r600/r600.h4
-rw-r--r--src/gallium/drivers/r600/r600_hw_context.c8
-rw-r--r--src/gallium/drivers/r600/r600_pipe.c8
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h2
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c22
5 files changed, 28 insertions, 16 deletions
diff --git a/src/gallium/drivers/r600/r600.h b/src/gallium/drivers/r600/r600.h
index 7d434169d9e..d15cd5256fa 100644
--- a/src/gallium/drivers/r600/r600.h
+++ b/src/gallium/drivers/r600/r600.h
@@ -175,7 +175,9 @@ struct r600_so_target {
struct pipe_stream_output_target b;
/* The buffer where BUFFER_FILLED_SIZE is stored. */
- struct r600_resource *filled_size;
+ struct r600_resource *buf_filled_size;
+ unsigned buf_filled_size_offset;
+
unsigned stride_in_dw;
unsigned so_index;
};
diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c
index c8253018446..c7a357e15ed 100644
--- a/src/gallium/drivers/r600/r600_hw_context.c
+++ b/src/gallium/drivers/r600/r600_hw_context.c
@@ -1005,7 +1005,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
if (ctx->streamout_append_bitmask & (1 << i)) {
va = r600_resource_va(&ctx->screen->screen,
- (void*)t[i]->filled_size);
+ (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
/* Append. */
cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
@@ -1017,7 +1017,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] =
- r600_context_bo_reloc(ctx, t[i]->filled_size,
+ r600_context_bo_reloc(ctx, t[i]->buf_filled_size,
RADEON_USAGE_READ);
} else {
/* Start from the beginning. */
@@ -1054,7 +1054,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
for (i = 0; i < ctx->num_so_targets; i++) {
if (t[i]) {
va = r600_resource_va(&ctx->screen->screen,
- (void*)t[i]->filled_size);
+ (void*)t[i]->buf_filled_size) + t[i]->buf_filled_size_offset;
cs->buf[cs->cdw++] = PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0);
cs->buf[cs->cdw++] = STRMOUT_SELECT_BUFFER(i) |
STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
@@ -1066,7 +1066,7 @@ void r600_context_streamout_end(struct r600_context *ctx)
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
cs->buf[cs->cdw++] =
- r600_context_bo_reloc(ctx, t[i]->filled_size,
+ r600_context_bo_reloc(ctx, t[i]->buf_filled_size,
RADEON_USAGE_WRITE);
}
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index deed4ec3c10..90289e582ab 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -185,6 +185,9 @@ static void r600_destroy_context(struct pipe_context *context)
if (rctx->uploader) {
u_upload_destroy(rctx->uploader);
}
+ if (rctx->allocator_so_filled_size) {
+ u_suballocator_destroy(rctx->allocator_so_filled_size);
+ }
util_slab_destroy(&rctx->pool_transfers);
r600_release_command_buffer(&rctx->start_cs_cmd);
@@ -291,6 +294,11 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
if (!rctx->uploader)
goto fail;
+ rctx->allocator_so_filled_size = u_suballocator_create(&rctx->context, 4096, 4,
+ 0, PIPE_USAGE_STATIC, TRUE);
+ if (!rctx->allocator_so_filled_size)
+ goto fail;
+
rctx->blitter = util_blitter_create(&rctx->context);
if (rctx->blitter == NULL)
goto fail;
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index a61a6e8c082..e707a4adda6 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -28,6 +28,7 @@
#include "util/u_blitter.h"
#include "util/u_slab.h"
+#include "util/u_suballoc.h"
#include "r600.h"
#include "r600_llvm.h"
#include "r600_public.h"
@@ -389,6 +390,7 @@ struct r600_context {
struct radeon_winsys_cs *cs;
struct blitter_context *blitter;
struct u_upload_mgr *uploader;
+ struct u_suballocator *allocator_so_filled_size;
struct util_slab_mempool pool_transfers;
/* Hardware info. */
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index b132850f0c7..66120cad27d 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -956,25 +956,25 @@ r600_create_so_target(struct pipe_context *ctx,
{
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_so_target *t;
- void *ptr;
t = CALLOC_STRUCT(r600_so_target);
if (!t) {
return NULL;
}
+ u_suballocator_alloc(rctx->allocator_so_filled_size, 4,
+ &t->buf_filled_size_offset,
+ (struct pipe_resource**)&t->buf_filled_size);
+ if (!t->buf_filled_size) {
+ FREE(t);
+ return NULL;
+ }
+
t->b.reference.count = 1;
t->b.context = ctx;
pipe_resource_reference(&t->b.buffer, buffer);
t->b.buffer_offset = buffer_offset;
t->b.buffer_size = buffer_size;
-
- t->filled_size = (struct r600_resource*)
- pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_STATIC, 4);
- ptr = rctx->ws->buffer_map(t->filled_size->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
- memset(ptr, 0, t->filled_size->buf->size);
- rctx->ws->buffer_unmap(t->filled_size->cs_buf);
-
return &t->b;
}
@@ -983,7 +983,7 @@ static void r600_so_target_destroy(struct pipe_context *ctx,
{
struct r600_so_target *t = (struct r600_so_target*)target;
pipe_resource_reference(&t->b.buffer, NULL);
- pipe_resource_reference((struct pipe_resource**)&t->filled_size, NULL);
+ pipe_resource_reference((struct pipe_resource**)&t->buf_filled_size, NULL);
FREE(t);
}
@@ -1308,7 +1308,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
} else {
if (info.count_from_stream_output) {
struct r600_so_target *t = (struct r600_so_target*)info.count_from_stream_output;
- uint64_t va = r600_resource_va(&rctx->screen->screen, (void*)t->filled_size);
+ uint64_t va = r600_resource_va(&rctx->screen->screen, (void*)t->buf_filled_size) + t->buf_filled_size_offset;
r600_write_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw);
@@ -1320,7 +1320,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
cs->buf[cs->cdw++] = 0; /* unused */
cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
- cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->filled_size, RADEON_USAGE_READ);
+ cs->buf[cs->cdw++] = r600_context_bo_reloc(rctx, t->buf_filled_size, RADEON_USAGE_READ);
}
cs->buf[cs->cdw++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1, rctx->predicate_drawing);