summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
authorChristian König <[email protected]>2010-11-28 14:48:31 +0100
committerChristian König <[email protected]>2010-12-03 19:04:00 +0100
commit4abe7382882a451a7750ccc451b8568768d122cb (patch)
tree273068eec7f3f16aa6fde6ac89e2f2e2f0e30e48 /src/gallium
parenta984c67b316ac2ca9aaf6d38a3127cf3d61a249e (diff)
use a shadow buffer for vertex data to optimize memory access
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/Makefile3
-rw-r--r--src/gallium/auxiliary/vl/vl_idct.c165
-rw-r--r--src/gallium/auxiliary/vl/vl_idct.h14
-rw-r--r--src/gallium/auxiliary/vl/vl_types.h5
-rw-r--r--src/gallium/auxiliary/vl/vl_vertex_buffers.c116
-rw-r--r--src/gallium/auxiliary/vl/vl_vertex_buffers.h75
6 files changed, 249 insertions, 129 deletions
diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index bf90a704dd4..997478f0664 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -147,7 +147,8 @@ C_SOURCES = \
vl/vl_mpeg12_mc_renderer.c \
vl/vl_compositor.c \
vl/vl_csc.c \
- vl/vl_idct.c
+ vl/vl_idct.c \
+ vl/vl_vertex_buffers.c
GALLIVM_SOURCES = \
gallivm/lp_bld_arit.c \
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 2466d5b751b..22feff8d8d4 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -26,6 +26,7 @@
**************************************************************************/
#include "vl_idct.h"
+#include "vl_vertex_buffers.h"
#include "util/u_draw.h"
#include <assert.h>
#include <pipe/p_context.h>
@@ -78,11 +79,6 @@ static const float const_matrix[8][8] = {
{ 0.0975451f, -0.2777850f, 0.4157350f, -0.4903930f, 0.4903930f, -0.4157350f, 0.277786f, -0.0975458f }
};
-/* vertices for a quad covering a block */
-static const struct vertex2f const_quad[4] = {
- {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
-};
-
static void *
create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
{
@@ -409,11 +405,6 @@ init_buffers(struct vl_idct *idct)
struct pipe_vertex_element vertex_elems[2];
unsigned i;
- idct->max_blocks =
- align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
- align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
- idct->destination->depth0;
-
memset(&template, 0, sizeof(struct pipe_resource));
template.last_level = 0;
template.depth0 = 1;
@@ -443,15 +434,7 @@ init_buffers(struct vl_idct *idct)
idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
}
- idct->vertex_bufs.individual.quad.stride = sizeof(struct vertex2f);
- idct->vertex_bufs.individual.quad.max_index = 4 * idct->max_blocks - 1;
- idct->vertex_bufs.individual.quad.buffer_offset = 0;
- idct->vertex_bufs.individual.quad.buffer = pipe_buffer_create
- (
- idct->pipe->screen,
- PIPE_BIND_VERTEX_BUFFER,
- sizeof(struct vertex2f) * 4 * idct->max_blocks
- );
+ idct->vertex_bufs.individual.quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks);
if(idct->vertex_bufs.individual.quad.buffer == NULL)
return false;
@@ -504,35 +487,11 @@ cleanup_buffers(struct vl_idct *idct)
}
static void
-init_constants(struct vl_idct *idct)
-{
- struct pipe_transfer *buf_transfer;
- struct vertex2f *v;
-
- unsigned i;
-
- /* quad vectors */
- v = pipe_buffer_map
- (
- idct->pipe,
- idct->vertex_bufs.individual.quad.buffer,
- PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
- &buf_transfer
- );
- for ( i = 0; i < idct->max_blocks; ++i)
- memcpy(v + i * 4, &const_quad, sizeof(const_quad));
- pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.quad.buffer, buf_transfer);
-}
-
-static void
init_state(struct vl_idct *idct)
{
struct pipe_sampler_state sampler;
unsigned i;
- idct->num_blocks = 0;
- idct->num_empty_blocks = 0;
-
idct->viewport[0].scale[0] = idct->textures.individual.intermediate->width0;
idct->viewport[0].scale[1] = idct->textures.individual.intermediate->height0;
@@ -674,26 +633,11 @@ xfer_buffers_map(struct vl_idct *idct)
);
idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
-
- idct->vectors = pipe_buffer_map
- (
- idct->pipe,
- idct->vertex_bufs.individual.pos.buffer,
- PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
- &idct->vec_transfer
- );
-
- idct->next_empty_block.l_x = ~1;
- idct->next_empty_block.l_y = ~1;
- idct->next_empty_block.r_x = ~1;
- idct->next_empty_block.r_y = ~1;
}
static void
xfer_buffers_unmap(struct vl_idct *idct)
{
- pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, idct->vec_transfer);
-
idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
}
@@ -708,6 +652,11 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
pipe_resource_reference(&idct->textures.individual.transpose, matrix);
pipe_resource_reference(&idct->destination, dst);
+ idct->max_blocks =
+ align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
+ align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
+ idct->destination->depth0;
+
if(!init_buffers(idct))
return false;
@@ -716,9 +665,21 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
return false;
}
+ if(!vl_vb_init(&idct->blocks, idct->max_blocks)) {
+ cleanup_shaders(idct);
+ cleanup_buffers(idct);
+ return false;
+ }
+
+ if(!vl_vb_init(&idct->empty_blocks, idct->max_blocks)) {
+ vl_vb_cleanup(&idct->blocks);
+ cleanup_shaders(idct);
+ cleanup_buffers(idct);
+ return false;
+ }
+
init_state(idct);
- init_constants(idct);
xfer_buffers_map(idct);
return true;
@@ -727,6 +688,8 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
void
vl_idct_cleanup(struct vl_idct *idct)
{
+ vl_vb_cleanup(&idct->blocks);
+ vl_vb_cleanup(&idct->empty_blocks);
cleanup_shaders(idct);
cleanup_buffers(idct);
@@ -735,43 +698,9 @@ vl_idct_cleanup(struct vl_idct *idct)
pipe_resource_reference(&idct->destination, NULL);
}
-static void
-flush_empty_block(struct vl_idct *idct, unsigned new_x, unsigned new_y)
-{
- if (idct->next_empty_block.l_x == ~1 ||
- idct->next_empty_block.l_y == ~1) {
-
- idct->next_empty_block.l_x = new_x;
- idct->next_empty_block.l_y = new_y;
-
- } else if (idct->next_empty_block.r_x != (new_x - 1) ||
- idct->next_empty_block.r_y != new_y) {
-
- struct vertex2f l, r, *v_dst;
-
- v_dst = idct->vectors + (idct->max_blocks - idct->num_empty_blocks) * 4 - 4;
-
- l.x = idct->next_empty_block.l_x;
- l.y = idct->next_empty_block.l_y;
- r.x = idct->next_empty_block.r_x;
- r.y = idct->next_empty_block.r_y;
- v_dst[0] = v_dst[3] = l;
- v_dst[1] = v_dst[2] = r;
-
- idct->next_empty_block.l_x = new_x;
- idct->next_empty_block.l_y = new_y;
- idct->num_empty_blocks++;
- }
-
- idct->next_empty_block.r_x = new_x;
- idct->next_empty_block.r_y = new_y;
-}
-
void
vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
{
- struct vertex2f v, *v_dst;
-
unsigned tex_pitch;
short *texels;
@@ -786,32 +715,38 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
for (i = 0; i < BLOCK_HEIGHT; ++i)
memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
- /* non empty blocks fills the vector buffer from left to right */
- v_dst = idct->vectors + idct->num_blocks * 4;
-
- idct->num_blocks++;
-
- v.x = x;
- v.y = y;
-
- for (i = 0; i < 4; ++i) {
- v_dst[i] = v;
- }
-
+ vl_vb_add_block(&idct->blocks, false, x, y);
} else {
- /* while empty blocks fills the vector buffer from right to left */
- flush_empty_block(idct, x, y);
+ vl_vb_add_block(&idct->empty_blocks, true, x, y);
}
}
void
vl_idct_flush(struct vl_idct *idct)
{
- flush_empty_block(idct, ~1, ~1);
+ struct pipe_transfer *vec_transfer;
+ struct quadf *vectors;
+ unsigned num_blocks, num_empty_blocks;
+
+ assert(idct);
+
+ vectors = pipe_buffer_map
+ (
+ idct->pipe,
+ idct->vertex_bufs.individual.pos.buffer,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+ &vec_transfer
+ );
+
+ num_blocks = vl_vb_upload(&idct->blocks, vectors);
+ num_empty_blocks = vl_vb_upload(&idct->empty_blocks, vectors + num_blocks);
+
+ pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, vec_transfer);
+
xfer_buffers_unmap(idct);
- if(idct->num_blocks > 0) {
+ if(num_blocks > 0) {
/* first stage */
idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]);
@@ -824,7 +759,7 @@ vl_idct_flush(struct vl_idct *idct)
idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
- util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
+ util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
/* second stage */
idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
@@ -837,10 +772,10 @@ vl_idct_flush(struct vl_idct *idct)
idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
- util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
+ util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
}
- if(idct->num_empty_blocks > 0) {
+ if(num_empty_blocks > 0) {
/* empty block handling */
idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
@@ -851,12 +786,8 @@ vl_idct_flush(struct vl_idct *idct)
idct->pipe->bind_vs_state(idct->pipe, idct->eb_vs);
idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs);
- util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS,
- (idct->max_blocks - idct->num_empty_blocks) * 4,
- idct->num_empty_blocks * 4);
+ util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, num_blocks * 4, num_empty_blocks * 4);
}
- idct->num_blocks = 0;
- idct->num_empty_blocks = 0;
xfer_buffers_map(idct);
}
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index c26f5cb8c09..94a5c73977f 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -29,6 +29,7 @@
#define vl_idct_h
#include <pipe/p_state.h>
+#include "vl_vertex_buffers.h"
struct vl_idct
{
@@ -82,20 +83,11 @@ struct vl_idct
struct { struct pipe_vertex_buffer quad, pos; } individual;
} vertex_bufs;
- unsigned num_blocks;
-
- struct
- {
- unsigned l_x, l_y, r_x, r_y;
- } next_empty_block;
-
- unsigned num_empty_blocks;
+ struct vl_vertex_buffer blocks;
+ struct vl_vertex_buffer empty_blocks;
struct pipe_transfer *tex_transfer;
short *texels;
-
- struct pipe_transfer *vec_transfer;
- struct vertex2f *vectors;
};
struct pipe_resource *vl_idct_upload_matrix(struct pipe_context *pipe);
diff --git a/src/gallium/auxiliary/vl/vl_types.h b/src/gallium/auxiliary/vl/vl_types.h
index ce175546894..eeabd43cb23 100644
--- a/src/gallium/auxiliary/vl/vl_types.h
+++ b/src/gallium/auxiliary/vl/vl_types.h
@@ -38,4 +38,9 @@ struct vertex4f
float x, y, z, w;
};
+struct quadf
+{
+ struct vertex2f bl, tl, tr, br;
+};
+
#endif /* vl_types_h */
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
new file mode 100644
index 00000000000..6df11db0aef
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -0,0 +1,116 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+#include <pipe/p_context.h>
+#include <pipe/p_screen.h>
+#include <util/u_memory.h>
+#include <util/u_inlines.h>
+#include "vl_vertex_buffers.h"
+#include "vl_types.h"
+
+/* vertices for a quad covering a block */
+static const struct quadf const_quad = {
+ {0.0f, 1.0f}, {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}
+};
+
+struct pipe_vertex_buffer
+vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks)
+{
+ struct pipe_vertex_buffer quad;
+ struct pipe_transfer *buf_transfer;
+ struct quadf *v;
+
+ unsigned i;
+
+ assert(pipe);
+ assert(max_blocks);
+
+ /* create buffer */
+ quad.stride = sizeof(struct vertex2f);
+ quad.max_index = 4 * max_blocks - 1;
+ quad.buffer_offset = 0;
+ quad.buffer = pipe_buffer_create
+ (
+ pipe->screen,
+ PIPE_BIND_VERTEX_BUFFER,
+ sizeof(struct vertex2f) * 4 * max_blocks
+ );
+
+ if(!quad.buffer)
+ return quad;
+
+ /* and fill it */
+ v = pipe_buffer_map
+ (
+ pipe,
+ quad.buffer,
+ PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+ &buf_transfer
+ );
+
+ for ( i = 0; i < max_blocks; ++i)
+ memcpy(v + i, &const_quad, sizeof(const_quad));
+
+ pipe_buffer_unmap(pipe, quad.buffer, buf_transfer);
+
+ return quad;
+}
+
+bool
+vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks)
+{
+ assert(buffer);
+
+ buffer->num_blocks = 0;
+ buffer->blocks = MALLOC(max_blocks * sizeof(struct quadf));
+ return buffer->blocks != NULL;
+}
+
+unsigned
+vl_vb_upload(struct vl_vertex_buffer *buffer, struct quadf *dst)
+{
+ unsigned todo;
+
+ assert(buffer);
+
+ todo = buffer->num_blocks;
+ buffer->num_blocks = 0;
+
+ if(todo)
+ memcpy(dst, buffer->blocks, sizeof(struct quadf) * todo);
+
+ return todo;
+}
+
+void
+vl_vb_cleanup(struct vl_vertex_buffer *buffer)
+{
+ assert(buffer);
+
+ FREE(buffer->blocks);
+}
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
new file mode 100644
index 00000000000..43ddc342d3d
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -0,0 +1,75 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef vl_vertex_buffers_h
+#define vl_vertex_buffers_h
+
+#include <assert.h>
+#include <pipe/p_state.h>
+#include "vl_types.h"
+
+struct vl_vertex_buffer
+{
+ unsigned num_blocks;
+ struct quadf *blocks;
+};
+
+struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks);
+
+bool vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks);
+
+static inline bool
+vl_vb_add_block(struct vl_vertex_buffer *buffer, bool allow_merge, signed x, signed y)
+{
+ struct quadf *quad;
+
+ assert(buffer);
+
+ allow_merge &= buffer->num_blocks > 0;
+ if (allow_merge) {
+
+ quad = buffer->blocks + buffer->num_blocks - 1;
+ if(quad->tr.x == (x - 1) && quad->br.x == (x - 1) &&
+ quad->tr.y == y && quad->br.y == y) {
+
+ quad->tr.x = quad->br.x = x;
+ quad->tr.y = quad->br.y = y;
+ return true;
+ }
+ }
+
+ quad = buffer->blocks + buffer->num_blocks;
+ quad->bl.x = quad->tl.x = quad->tr.x = quad->br.x = x;
+ quad->bl.y = quad->tl.y = quad->tr.y = quad->br.y = y;
+ buffer->num_blocks++;
+ return false;
+}
+
+unsigned vl_vb_upload(struct vl_vertex_buffer *buffer, struct quadf *dst);
+
+void vl_vb_cleanup(struct vl_vertex_buffer *buffer);
+
+#endif