summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian König <[email protected]>2010-12-05 16:46:10 +0100
committerChristian König <[email protected]>2010-12-05 16:46:10 +0100
commit8e0c05960daa7a38ab7834e6a9e7e0a7a973ac2d (patch)
treeffc7d098b210a83f71d3fa0448a22c84276f6413
parent9af3c243d958dd5b9802dda321ab980c83cb8cb8 (diff)
[g3dvl] cleanup empty block handling
doing empty block handling in the mc code is indeed faster
-rw-r--r--src/gallium/auxiliary/vl/vl_idct.c92
-rw-r--r--src/gallium/auxiliary/vl/vl_idct.h4
2 files changed, 26 insertions, 70 deletions
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index b65a808a592..a5b2053065e 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -47,11 +47,6 @@
#define NR_RENDER_TARGETS 1
-struct vertex_shader_consts
-{
- struct vertex4f norm;
-};
-
enum VS_INPUT
{
VS_I_RECT,
@@ -80,7 +75,7 @@ static const float const_matrix[8][8] = {
};
static void *
-create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
+create_vert_shader(struct vl_idct *idct)
{
struct ureg_program *shader;
struct ureg_src scale;
@@ -98,6 +93,9 @@ create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+ o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
+ o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
+ o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
/*
* scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
@@ -120,15 +118,9 @@ create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
- if(calc_src_cords) {
- o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
- o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
- o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
-
- ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
- ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
- ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
- }
+ ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
+ ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+ ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
ureg_release_temporary(shader, t_vpos);
@@ -213,7 +205,7 @@ create_transpose_frag_shader(struct vl_idct *idct)
struct ureg_src block, tex, sampler[2];
struct ureg_src start[2];
- struct ureg_dst m[2][2];
+ struct ureg_dst l[2], r[2];
struct ureg_dst tmp, fragment;
shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
@@ -229,20 +221,20 @@ create_transpose_frag_shader(struct vl_idct *idct)
start[0] = ureg_imm1f(shader, 0.0f);
start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
- fetch_four(shader, m[0], block, sampler[0], start[0], block, false, false, transpose->width0);
- fetch_four(shader, m[1], tex, sampler[1], start[1], block, true, false, intermediate->height0);
+ fetch_four(shader, l, block, sampler[0], start[0], block, false, false, transpose->width0);
+ fetch_four(shader, r, tex, sampler[1], start[1], block, true, false, intermediate->height0);
fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
tmp = ureg_DECL_temporary(shader);
- matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), m[0], m[1]);
+ matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), l, r);
ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
ureg_release_temporary(shader, tmp);
- ureg_release_temporary(shader, m[0][0]);
- ureg_release_temporary(shader, m[0][1]);
- ureg_release_temporary(shader, m[1][0]);
- ureg_release_temporary(shader, m[1][1]);
+ ureg_release_temporary(shader, l[0]);
+ ureg_release_temporary(shader, l[1]);
+ ureg_release_temporary(shader, r[0]);
+ ureg_release_temporary(shader, r[1]);
ureg_END(shader);
@@ -325,54 +317,25 @@ create_matrix_frag_shader(struct vl_idct *idct)
return ureg_create_shader_and_destroy(shader, idct->pipe);
}
-static void *
-create_empty_block_frag_shader(struct vl_idct *idct)
-{
- struct ureg_program *shader;
- struct ureg_dst fragment;
-
- shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
- if (!shader)
- return NULL;
-
- fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
- ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
-
- ureg_END(shader);
-
- return ureg_create_shader_and_destroy(shader, idct->pipe);
-}
-
static bool
init_shaders(struct vl_idct *idct)
{
- idct->matrix_vs = create_vert_shader(idct, true);
+ idct->vs = create_vert_shader(idct);
idct->matrix_fs = create_matrix_frag_shader(idct);
-
- idct->transpose_vs = create_vert_shader(idct, true);
idct->transpose_fs = create_transpose_frag_shader(idct);
- idct->eb_vs = create_vert_shader(idct, false);
- idct->eb_fs = create_empty_block_frag_shader(idct);
-
return
- idct->transpose_vs != NULL && idct->transpose_fs != NULL &&
- idct->matrix_vs != NULL && idct->matrix_fs != NULL &&
- idct->eb_vs != NULL && idct->eb_fs != NULL;
+ idct->vs != NULL &&
+ idct->transpose_fs != NULL &&
+ idct->matrix_fs != NULL;
}
static void
cleanup_shaders(struct vl_idct *idct)
{
- idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
- idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
-
- idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
+ idct->pipe->delete_vs_state(idct->pipe, idct->vs);
idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
-
- idct->pipe->delete_vs_state(idct->pipe, idct->eb_vs);
- idct->pipe->delete_fs_state(idct->pipe, idct->eb_fs);
+ idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
}
static bool
@@ -725,31 +688,24 @@ vl_idct_flush(struct vl_idct *idct)
if(num_blocks > 0) {
idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
+ idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
+ idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
+ idct->pipe->bind_vs_state(idct->pipe, idct->vs);
/* first stage */
idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]);
idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[0]);
-
- idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
- idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]);
idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
- idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
-
util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
/* second stage */
idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
-
- idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
- idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]);
idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
- idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
-
util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
}
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 3388d91f353..ccaaf2653de 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -66,8 +66,8 @@ struct vl_idct
} individual;
} sampler_views;
- void *matrix_vs, *transpose_vs, *eb_vs;
- void *matrix_fs, *transpose_fs, *eb_fs;
+ void *vs;
+ void *matrix_fs, *transpose_fs;
union
{