summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/vl/vl_idct.c
diff options
context:
space:
mode:
authorChristian König <[email protected]>2011-05-26 01:50:44 +0200
committerChristian König <[email protected]>2011-05-29 20:07:57 +0200
commitae56a1dd67040dc5d53f4a1622f775462f0fec05 (patch)
tree4fb1c7576ae99f183b68577f6cf5c2dae70a7d5c /src/gallium/auxiliary/vl/vl_idct.c
parentb4837a53d365bd1bd5f50b3fb40910eeaac869a5 (diff)
[g3dvl] implement mismatch control inside idct shaders
Diffstat (limited to 'src/gallium/auxiliary/vl/vl_idct.c')
-rw-r--r--src/gallium/auxiliary/vl/vl_idct.c210
1 files changed, 190 insertions, 20 deletions
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 602258ece86..45180499e2e 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -139,6 +139,121 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2
}
static void *
+create_mismatch_vert_shader(struct vl_idct *idct)
+{
+ struct ureg_program *shader;
+ struct ureg_src vrect, vpos;
+ struct ureg_src scale;
+ struct ureg_dst t_tex;
+ struct ureg_dst o_vpos, o_addr[2];
+
+ shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+ if (!shader)
+ return NULL;
+
+ vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+ vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+ t_tex = ureg_DECL_temporary(shader);
+
+ o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+
+ o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
+ o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
+
+ /*
+ * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
+ *
+ * t_vpos = vpos + 7 / BLOCK_WIDTH
+ * o_vpos.xy = t_vpos * scale
+ *
+ * o_addr = calc_addr(...)
+ *
+ */
+
+ scale = ureg_imm2f(shader,
+ (float)BLOCK_WIDTH / idct->buffer_width,
+ (float)BLOCK_HEIGHT / idct->buffer_height);
+
+ ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);
+ ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
+
+ ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);
+ calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);
+
+ ureg_release_temporary(shader, t_tex);
+
+ ureg_END(shader);
+
+ return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
+static void *
+create_mismatch_frag_shader(struct vl_idct *idct)
+{
+ struct ureg_program *shader;
+
+ struct ureg_src addr[2];
+
+ struct ureg_dst m[8][2];
+ struct ureg_dst fragment;
+
+ unsigned i;
+
+ shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ if (!shader)
+ return NULL;
+
+ addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
+ addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+ fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+ for (i = 0; i < 8; ++i) {
+ m[i][0] = ureg_DECL_temporary(shader);
+ m[i][1] = ureg_DECL_temporary(shader);
+ }
+
+ for (i = 0; i < 8; ++i) {
+ increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);
+ }
+
+ for (i = 0; i < 8; ++i) {
+ struct ureg_src s_addr[2] = { ureg_src(m[i][0]), ureg_src(m[i][1]) };
+ fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);
+ }
+
+ for (i = 1; i < 8; ++i) {
+ ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));
+ ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));
+ }
+
+ ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));
+ ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));
+
+ ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));
+ ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));
+ ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));
+
+ ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),
+ ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));
+ ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),
+ ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));
+
+ ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));
+ ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));
+
+ for (i = 0; i < 8; ++i) {
+ ureg_release_temporary(shader, m[i][0]);
+ ureg_release_temporary(shader, m[i][1]);
+ }
+
+ ureg_END(shader);
+
+ return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
+static void *
create_stage1_vert_shader(struct vl_idct *idct)
{
struct ureg_program *shader;
@@ -239,14 +354,14 @@ create_stage1_frag_shader(struct vl_idct *idct)
for (i = 0; i < 4; ++i) {
struct ureg_src s_addr[2] = { ureg_src(l[i][0]), ureg_src(l[i][1]) };
- fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1), false);
+ fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false);
}
for (i = 0; i < idct->nr_of_render_targets; ++i) {
increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, BLOCK_HEIGHT);
struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) };
- fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0), false);
+ fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false);
for (j = 0; j < 4; ++j) {
matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
@@ -324,8 +439,8 @@ vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
r[0] = ureg_DECL_temporary(shader);
r[1] = ureg_DECL_temporary(shader);
- fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0), false);
- fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1), true);
+ fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false);
+ fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true);
matrix_mul(shader, fragment, l, r);
@@ -338,6 +453,14 @@ vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
static bool
init_shaders(struct vl_idct *idct)
{
+ idct->vs_mismatch = create_mismatch_vert_shader(idct);
+ if (!idct->vs_mismatch)
+ goto error_vs_mismatch;
+
+ idct->fs_mismatch = create_mismatch_frag_shader(idct);
+ if (!idct->fs_mismatch)
+ goto error_fs_mismatch;
+
idct->vs = create_stage1_vert_shader(idct);
if (!idct->vs)
goto error_vs;
@@ -352,12 +475,20 @@ error_fs:
idct->pipe->delete_vs_state(idct->pipe, idct->vs);
error_vs:
+ idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
+
+error_fs_mismatch:
+ idct->pipe->delete_vs_state(idct->pipe, idct->fs);
+
+error_vs_mismatch:
return false;
}
static void
cleanup_shaders(struct vl_idct *idct)
{
+ idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
+ idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch);
idct->pipe->delete_vs_state(idct->pipe, idct->vs);
idct->pipe->delete_fs_state(idct->pipe, idct->fs);
}
@@ -373,6 +504,7 @@ init_state(struct vl_idct *idct)
assert(idct);
memset(&rs_state, 0, sizeof(rs_state));
+ rs_state.point_size = 1;
rs_state.gl_rasterization_rules = true;
idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
if (!idct->rs_state)
@@ -442,6 +574,45 @@ cleanup_state(struct vl_idct *idct)
}
static bool
+init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+{
+ struct pipe_resource *tex;
+ struct pipe_surface surf_templ;
+
+ assert(idct && buffer);
+
+ tex = buffer->sampler_views.individual.source->texture;
+
+ buffer->fb_state_mismatch.width = tex->width0;
+ buffer->fb_state_mismatch.height = tex->height0;
+ buffer->fb_state_mismatch.nr_cbufs = 1;
+
+ memset(&surf_templ, 0, sizeof(surf_templ));
+ surf_templ.format = tex->format;
+ surf_templ.u.tex.first_layer = 0;
+ surf_templ.u.tex.last_layer = 0;
+ surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+ buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ);
+
+ buffer->viewport_mismatch.scale[0] = tex->width0;
+ buffer->viewport_mismatch.scale[1] = tex->height0;
+ buffer->viewport_mismatch.scale[2] = 1;
+ buffer->viewport_mismatch.scale[3] = 1;
+
+ return true;
+}
+
+static void
+cleanup_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+{
+ assert(idct && buffer);
+
+ pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL);
+
+ pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL);
+}
+
+static bool
init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
{
struct pipe_resource *tex;
@@ -470,6 +641,8 @@ init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
buffer->viewport.scale[0] = tex->width0;
buffer->viewport.scale[1] = tex->height0;
+ buffer->viewport.scale[2] = 1;
+ buffer->viewport.scale[3] = 1;
return true;
@@ -609,13 +782,11 @@ vl_idct_cleanup(struct vl_idct *idct)
bool
vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
struct pipe_sampler_view *source,
- struct pipe_sampler_view *intermediate,
- struct pipe_surface *destination)
+ struct pipe_sampler_view *intermediate)
{
assert(buffer);
assert(idct);
assert(source);
- assert(destination);
memset(buffer, 0, sizeof(struct vl_idct_buffer));
@@ -624,15 +795,11 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);
- if (!init_intermediate(idct, buffer))
+ if (!init_source(idct, buffer))
return false;
- buffer->viewport.scale[2] = 1;
- buffer->viewport.scale[3] = 1;
- buffer->viewport.translate[0] = 0;
- buffer->viewport.translate[1] = 0;
- buffer->viewport.translate[2] = 0;
- buffer->viewport.translate[3] = 0;
+ if (!init_intermediate(idct, buffer))
+ return false;
return true;
}
@@ -640,13 +807,9 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
void
vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
{
- unsigned i;
-
assert(idct && buffer);
- for(i = 0; i < idct->nr_of_render_targets; ++i)
- pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
-
+ cleanup_source(idct, buffer);
cleanup_intermediate(idct, buffer);
}
@@ -659,11 +822,18 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_
idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
idct->pipe->bind_blend_state(idct->pipe, idct->blend);
idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
+ idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
+
+ /* mismatch control */
+ idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch);
+ idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport_mismatch);
+ idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch);
+ idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch);
+ util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances);
/* first stage */
idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state);
idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport);
- idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
idct->pipe->bind_vs_state(idct->pipe, idct->vs);
idct->pipe->bind_fs_state(idct->pipe, idct->fs);
util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);