6 files changed, 848 insertions, 111 deletions
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index f262c13e0fc..3b1d26d3db8 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -44,6 +44,14 @@ static const unsigned const_empty_block_mask_420[3][2][2] = {
    { { 0x01, 0x01 },  { 0x01, 0x01 } }
 };
 
+static const enum pipe_format const_zscan_source_formats[] = {
+   PIPE_FORMAT_R16_SNORM,
+   PIPE_FORMAT_R16_SSCALED
+};
+
+static const unsigned num_zscan_source_formats =
+   sizeof(const_zscan_source_formats) / sizeof(enum pipe_format);
+
 static const enum pipe_format const_idct_source_formats[] = {
    PIPE_FORMAT_R16G16B16A16_SNORM,
    PIPE_FORMAT_R16G16B16A16_SSCALED
@@ -79,10 +87,8 @@ map_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
 
    assert(ctx && buffer);
 
-   if (ctx->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      sampler_views = buffer->idct_source->get_sampler_views(buffer->idct_source);
-   else
-      sampler_views = buffer->mc_source->get_sampler_views(buffer->mc_source);
+   sampler_views = buffer->zscan_source->get_sampler_views(buffer->zscan_source);
+
    assert(sampler_views);
 
    for (i = 0; i < VL_MAX_PLANES; ++i) {
@@ -112,21 +118,17 @@ upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane,
              unsigned x, unsigned y, short *block,
              bool intra, enum pipe_mpeg12_dct_type type)
 {
-   unsigned tex_pitch;
    short *texels;
-
-   unsigned i;
+   unsigned idx;
 
    assert(buffer);
    assert(block);
 
-   vl_vb_add_ycbcr(&buffer->vertex_stream, plane, x, y, intra, type);
+   idx = vl_vb_add_ycbcr(&buffer->vertex_stream, plane, x, y, intra, type);
 
-   tex_pitch = buffer->tex_transfer[plane]->stride / sizeof(short);
-   texels = buffer->texels[plane] + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
+   texels = buffer->texels[plane] + idx * BLOCK_WIDTH * BLOCK_HEIGHT;
 
-   for (i = 0; i < BLOCK_HEIGHT; ++i)
-      memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
+   memcpy(texels, block, BLOCK_WIDTH * BLOCK_HEIGHT * sizeof(short));
 }
 
 static void
@@ -178,6 +180,144 @@ unmap_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
    }
 }
 
+static bool
+init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
+{
+   enum pipe_format formats[3];
+
+   struct pipe_sampler_view **source;
+   struct pipe_surface **destination;
+
+   struct vl_mpeg12_decoder *dec;
+
+   unsigned i;
+
+   assert(buffer);
+
+   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
+
+   formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
+   buffer->zscan_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                               dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
+                                               dec->max_blocks / dec->blocks_per_line,
+                                               1, PIPE_VIDEO_CHROMA_FORMAT_444,
+                                               formats, PIPE_USAGE_STATIC);
+   if (!buffer->zscan_source)
+      goto error_source;
+
+   source = buffer->zscan_source->get_sampler_views(buffer->zscan_source);
+   if (!source)
+      goto error_sampler;
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      destination = buffer->idct_source->get_surfaces(buffer->idct_source);
+   else
+      destination = buffer->mc_source->get_surfaces(buffer->mc_source);
+
+   if (!destination)
+      goto error_surface;
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c,
+                                &buffer->zscan[i], source[i], destination[i]))
+         goto error_plane;
+
+   return true;
+
+error_plane:
+   for (; i > 0; --i)
+      vl_zscan_cleanup_buffer(&buffer->zscan[i - 1]);
+
+error_surface:
+error_sampler:
+   buffer->zscan_source->destroy(buffer->zscan_source);
+
+error_source:
+   return false;
+}
+
+static void
+cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer)
+{
+   unsigned i;
+
+   assert(buffer);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      vl_zscan_cleanup_buffer(&buffer->zscan[i]);
+   buffer->zscan_source->destroy(buffer->zscan_source);
+}
+
+static bool
+init_idct_buffer(struct vl_mpeg12_buffer *buffer)
+{
+   enum pipe_format formats[3];
+
+   struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv;
+   struct pipe_surface **idct_surfaces;
+
+   struct vl_mpeg12_decoder *dec;
+
+   unsigned i;
+
+   assert(buffer);
+
+   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
+
+   formats[0] = formats[1] = formats[2] = dec->idct_source_format;
+   buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                              dec->base.width / 4, dec->base.height, 1,
+                                              dec->base.chroma_format,
+                                              formats, PIPE_USAGE_STATIC);
+   if (!buffer->idct_source)
+      goto error_source;
+
+   formats[0] = formats[1] = formats[2] = dec->idct_intermediate_format;
+   buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                                    dec->base.width / dec->nr_of_idct_render_targets,
+                                                    dec->base.height / 4, dec->nr_of_idct_render_targets,
+                                                    dec->base.chroma_format,
+                                                    formats, PIPE_USAGE_STATIC);
+
+   if (!buffer->idct_intermediate)
+      goto error_intermediate;
+
+   idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source);
+   if (!idct_source_sv)
+      goto error_source_sv;
+
+   idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate);
+   if (!idct_intermediate_sv)
+      goto error_intermediate_sv;
+
+   idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source);
+   if (!idct_surfaces)
+      goto error_surfaces;
+
+   for (i = 0; i < 3; ++i)
+      if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
+                               &buffer->idct[i], idct_source_sv[i],
+                               idct_intermediate_sv[i], idct_surfaces[i]))
+         goto error_plane;
+
+   return true;
+
+error_plane:
+   for (; i > 0; --i)
+      vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
+
+error_surfaces:
+error_intermediate_sv:
+error_source_sv:
+   buffer->idct_intermediate->destroy(buffer->idct_intermediate);
+
+error_intermediate:
+   buffer->idct_source->destroy(buffer->idct_source);
+
+error_source:
+   return false;
+}
+
 static void
 cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
 {
@@ -187,11 +327,11 @@ cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
    assert(dec);
 
-   buf->idct_source->destroy(buf->idct_source);
-   buf->idct_intermediate->destroy(buf->idct_intermediate);
    vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
    vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
    vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
+   buf->idct_source->destroy(buf->idct_source);
+   buf->idct_intermediate->destroy(buf->idct_intermediate);
 }
 
 static void
@@ -206,6 +346,8 @@ vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
    assert(dec);
 
+   cleanup_zscan_buffer(buf);
+
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
       cleanup_idct_buffer(buf);
 
@@ -310,6 +452,9 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
       vl_idct_cleanup(&dec->idct_c);
    }
 
+   vl_zscan_cleanup(&dec->zscan_y);
+   vl_zscan_cleanup(&dec->zscan_c);
+
    dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
    dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv);
 
@@ -319,76 +464,6 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    FREE(dec);
 }
 
-static bool
-init_idct_buffer(struct vl_mpeg12_buffer *buffer)
-{
-   enum pipe_format formats[3];
-
-   struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv;
-   struct pipe_surface **idct_surfaces;
-
-   struct vl_mpeg12_decoder *dec;
-
-   unsigned i;
-
-   assert(buffer);
-
-   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
-
-   formats[0] = formats[1] = formats[2] = dec->idct_source_format;
-   buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                              dec->base.width / 4, dec->base.height, 1,
-                                              dec->base.chroma_format,
-                                              formats, PIPE_USAGE_STREAM);
-   if (!buffer->idct_source)
-      goto error_source;
-
-   formats[0] = formats[1] = formats[2] = dec->idct_intermediate_format;
-   buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                                    dec->base.width / dec->nr_of_idct_render_targets,
-                                                    dec->base.height / 4, dec->nr_of_idct_render_targets,
-                                                    dec->base.chroma_format,
-                                                    formats, PIPE_USAGE_STATIC);
-
-   if (!buffer->idct_intermediate)
-      goto error_intermediate;
-
-   idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source);
-   if (!idct_source_sv)
-      goto error_source_sv;
-
-   idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate);
-   if (!idct_intermediate_sv)
-      goto error_intermediate_sv;
-
-   idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source);
-   if (!idct_surfaces)
-      goto error_surfaces;
-
-   for (i = 0; i < 3; ++i)
-      if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
-                               &buffer->idct[i], idct_source_sv[i],
-                               idct_intermediate_sv[i], idct_surfaces[i]))
-         goto error_plane;
-
-   return true;
-
-error_plane:
-   for (; i > 0; --i)
-      vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
-
-error_surfaces:
-error_intermediate_sv:
-error_source_sv:
-   buffer->idct_intermediate->destroy(buffer->idct_intermediate);
-
-error_intermediate:
-   buffer->idct_source->destroy(buffer->idct_source);
-
-error_source:
-   return false;
-}
-
 static struct pipe_video_decode_buffer *
 vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
 {
@@ -426,10 +501,6 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    if (!buffer->mc_source)
       goto error_mc_source;
 
-   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      if (!init_idct_buffer(buffer))
-         goto error_idct;
-
    mc_source_sv = buffer->mc_source->get_sampler_views(buffer->mc_source);
    if (!mc_source_sv)
       goto error_mc_source_sv;
@@ -443,8 +514,18 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    if(!vl_mc_init_buffer(&dec->mc_c, &buffer->mc[2], mc_source_sv[2]))
       goto error_mc_cr;
 
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      if (!init_idct_buffer(buffer))
+         goto error_idct;
+
+   if (!init_zscan_buffer(buffer))
+      goto error_zscan;
+
    return &buffer->base;
 
+error_zscan:
+   // TODO Cleanup error handling
+
 error_mc_cr:
    vl_mc_cleanup_buffer(&buffer->mc[1]);
 
@@ -517,6 +598,8 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
       vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
       dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
 
+      vl_zscan_render(&buf->zscan[i] , num_instances);
+
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
          vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_instances);
 
@@ -590,9 +673,47 @@ find_first_supported_format(struct vl_mpeg12_decoder *dec,
 }
 
 static bool
-init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_height)
+init_zscan(struct vl_mpeg12_decoder *dec)
+{
+   struct pipe_sampler_view *layout;
+
+   unsigned num_channels;
+
+   assert(dec);
+
+   dec->blocks_per_line = 4;
+   dec->max_blocks =
+      (dec->base.width * dec->base.height) /
+      (BLOCK_WIDTH * BLOCK_HEIGHT);
+
+   dec->zscan_source_format = find_first_supported_format(dec, const_zscan_source_formats,
+                                                          num_zscan_source_formats, PIPE_TEXTURE_2D);
+
+   if (dec->zscan_source_format == PIPE_FORMAT_NONE)
+      return false;
+
+   layout = vl_zscan_linear(dec->pipe, dec->blocks_per_line);
+
+   num_channels = dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 4 : 1;
+
+   if (!vl_zscan_init(&dec->zscan_y, dec->pipe, dec->base.width, dec->base.height,
+                      dec->blocks_per_line, dec->max_blocks, num_channels))
+      return false;
+
+   vl_zscan_set_layout(&dec->zscan_y, layout);
+
+   if (!vl_zscan_init(&dec->zscan_c, dec->pipe, dec->chroma_width, dec->chroma_height,
+                      dec->blocks_per_line, dec->max_blocks, num_channels))
+      return false;
+
+   vl_zscan_set_layout(&dec->zscan_c, layout);
+
+   return true;
+}
+
+static bool
+init_idct(struct vl_mpeg12_decoder *dec)
 {
-   unsigned chroma_width, chroma_height;
    struct pipe_sampler_view *matrix, *transpose;
    float matrix_scale, transpose_scale;
 
@@ -645,22 +766,11 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_
    } else
       pipe_sampler_view_reference(&transpose, matrix);
 
-   if (!vl_idct_init(&dec->idct_y, dec->pipe, buffer_width, buffer_height,
+   if (!vl_idct_init(&dec->idct_y, dec->pipe, dec->base.width, dec->base.height,
                      dec->nr_of_idct_render_targets, matrix, transpose))
       goto error_y;
 
-   if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      chroma_width = buffer_width / 2;
-      chroma_height = buffer_height / 2;
-   } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
-      chroma_width = buffer_width;
-      chroma_height = buffer_height / 2;
-   } else {
-      chroma_width = buffer_width;
-      chroma_height = buffer_height;
-   }
-
-   if(!vl_idct_init(&dec->idct_c, dec->pipe, chroma_width, chroma_height,
+   if(!vl_idct_init(&dec->idct_c, dec->pipe, dec->chroma_width, dec->chroma_height,
                     dec->nr_of_idct_render_targets, matrix, transpose))
       goto error_c;
 
@@ -736,8 +846,22 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    if (dec->mc_source_format == PIPE_FORMAT_NONE)
       return NULL;
 
+   if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+      dec->chroma_width = dec->base.width / 2;
+      dec->chroma_height = dec->base.height / 2;
+   } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
+      dec->chroma_width = dec->base.width;
+      dec->chroma_height = dec->base.height / 2;
+   } else {
+      dec->chroma_width = dec->base.width;
+      dec->chroma_height = dec->base.height;
+   }
+
+   if (!init_zscan(dec))
+      return NULL; // TODO error handling
+
    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
-      if (!init_idct(dec, dec->base.width, dec->base.height))
+      if (!init_idct(dec))
          goto error_idct;
       if (dec->mc_source_format == PIPE_FORMAT_R16_SSCALED)
          mc_scale = SCALE_FACTOR_SSCALED;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index c961e433b51..b94f12a9b7a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -30,6 +30,7 @@
 
 #include <pipe/p_video_context.h>
 
+#include "vl_zscan.h"
 #include "vl_idct.h"
 #include "vl_mc.h"
 
@@ -44,9 +45,15 @@ struct vl_mpeg12_decoder
    struct pipe_video_decoder base;
    struct pipe_context *pipe;
 
+   unsigned chroma_width, chroma_height;
+
+   unsigned blocks_per_line;
+   unsigned max_blocks;
+
    const unsigned (*empty_block_mask)[3][2][2];
    unsigned nr_of_idct_render_targets;
 
+   enum pipe_format zscan_source_format;
    enum pipe_format idct_source_format;
    enum pipe_format idct_intermediate_format;
    enum pipe_format mc_source_format;
@@ -57,6 +64,7 @@ struct vl_mpeg12_decoder
    void *ves_ycbcr;
    void *ves_mv;
 
+   struct vl_zscan zscan_y, zscan_c;
    struct vl_idct idct_y, idct_c;
    struct vl_mc mc_y, mc_c;
 
@@ -69,10 +77,12 @@ struct vl_mpeg12_buffer
 
    struct vl_vertex_buffer vertex_stream;
 
+   struct pipe_video_buffer *zscan_source;
    struct pipe_video_buffer *idct_source;
    struct pipe_video_buffer *idct_intermediate;
    struct pipe_video_buffer *mc_source;
 
+   struct vl_zscan_buffer zscan[VL_MAX_PLANES];
    struct vl_idct_buffer idct[VL_MAX_PLANES];
    struct vl_mc_buffer mc[VL_MAX_PLANES];
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index e61425843ff..d2025f76b86 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -301,9 +301,10 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 
 }
 
-void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
-                     unsigned component, unsigned x, unsigned y,
-                     bool intra, enum pipe_mpeg12_dct_type type)
+unsigned
+vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
+                unsigned component, unsigned x, unsigned y,
+                bool intra, enum pipe_mpeg12_dct_type type)
 {
    struct vl_ycbcr_vertex_stream *stream;
 
@@ -316,7 +317,7 @@ void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
    stream->intra = intra;
    stream->field = type == PIPE_MPEG12_DCT_TYPE_FIELD;
 
-   buffer->ycbcr[component].num_instances++;
+   return buffer->ycbcr[component].num_instances++;
 }
 
 unsigned
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 6a83111b4a8..89d455225ad 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -84,9 +84,9 @@ void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
 struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component);
 
-void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
-                     unsigned component, unsigned x, unsigned y,
-                     bool intra, enum pipe_mpeg12_dct_type type);
+unsigned vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
+                         unsigned component, unsigned x, unsigned y,
+                         bool intra, enum pipe_mpeg12_dct_type type);
 
 struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int ref_frame);
 
diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
new file mode 100644
index 00000000000..4d4d3fd6d95
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -0,0 +1,492 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+
+#include <pipe/p_screen.h>
+#include <pipe/p_context.h>
+
+#include <util/u_draw.h>
+#include <util/u_sampler.h>
+#include <util/u_inlines.h>
+
+#include <tgsi/tgsi_ureg.h>
+
+#include <vl/vl_defines.h>
+#include <vl/vl_types.h>
+
+#include "vl_zscan.h"
+#include "vl_vertex_buffers.h"
+
+enum VS_OUTPUT
+{
+   VS_O_VPOS,
+   VS_O_VTEX
+};
+
+static void *
+create_vert_shader(struct vl_zscan *zscan)
+{
+   struct ureg_program *shader;
+
+   struct ureg_src scale, instance;
+   struct ureg_src vrect, vpos;
+
+   struct ureg_dst tmp;
+   struct ureg_dst o_vpos, o_vtex[zscan->num_channels];
+
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   scale = ureg_imm2f(shader,
+      (float)BLOCK_WIDTH / zscan->buffer_width,
+      (float)BLOCK_HEIGHT / zscan->buffer_height);
+
+   instance = ureg_DECL_system_value(shader, 0, TGSI_SEMANTIC_INSTANCEID, 0);
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   tmp = ureg_DECL_temporary(shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i);
+
+   /*
+    * o_vpos.xy = (vpos + vrect) * scale
+    * o_vpos.zw = 1.0f
+    *
+    * tmp.xy = InstanceID / blocks_per_line
+    * tmp.x = frac(tmp.x)
+    * tmp.y = floor(tmp.y)
+    *
+    * o_vtex.x = vrect.x / blocks_per_line + tmp.x
+    * o_vtex.y = vrect.y
+    * o_vtex.z = tmp.z * blocks_per_line / blocks_total
+    */
+   ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect);
+   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
+
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), instance,
+            ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
+
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp));
+   ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp));
+
+   for (i = 0; i < zscan->num_channels; ++i) {
+      if (i > 0)
+         ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp),
+                  ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * BLOCK_WIDTH)));
+
+      ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
+               ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
+      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
+      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_src(tmp),
+               ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
+   }
+
+   ureg_release_temporary(shader, tmp);
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, zscan->pipe);
+}
+
+static void *
+create_frag_shader(struct vl_zscan *zscan)
+{
+   struct ureg_program *shader;
+   struct ureg_src vtex[zscan->num_channels];
+
+   struct ureg_src src, scan, quant;
+
+   struct ureg_dst tmp[zscan->num_channels];
+   struct ureg_dst fragment;
+
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);
+
+   src = ureg_DECL_sampler(shader, 0);
+   scan = ureg_DECL_sampler(shader, 1);
+   quant = ureg_DECL_sampler(shader, 2);
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      tmp[i] = ureg_DECL_temporary(shader);
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /*
+    * tmp.x = tex(vtex, 1)
+    * tmp.y = vtex.z
+    * fragment = tex(tmp, 0) * quant
+    */
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], scan);
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_Z));
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_TEX(shader, tmp[i], TGSI_TEXTURE_2D, ureg_src(tmp[i]), src);
+
+   // TODO: Fetch quant and use it
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), ureg_src(tmp[i]), ureg_imm1f(shader, 1.0f));
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_release_temporary(shader, tmp[i]);
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, zscan->pipe);
+}
+
+static bool
+init_shaders(struct vl_zscan *zscan)
+{
+   assert(zscan);
+
+   zscan->vs = create_vert_shader(zscan);
+   if (!zscan->vs)
+      goto error_vs;
+
+   zscan->fs = create_frag_shader(zscan);
+   if (!zscan->fs)
+      goto error_fs;
+
+   return true;
+
+error_fs:
+   zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
+
+error_vs:
+   return false;
+}
+
+static void
+cleanup_shaders(struct vl_zscan *zscan)
+{
+   assert(zscan);
+
+   zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
+   zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs);
+}
+
+static bool
+init_state(struct vl_zscan *zscan)
+{
+   struct pipe_blend_state blend;
+   struct pipe_rasterizer_state rs_state;
+   struct pipe_sampler_state sampler;
+   unsigned i;
+
+   assert(zscan);
+
+   memset(&rs_state, 0, sizeof(rs_state));
+   rs_state.gl_rasterization_rules = false;
+   zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state);
+   if (!zscan->rs_state)
+      goto error_rs_state;
+
+   memset(&blend, 0, sizeof blend);
+
+   blend.independent_blend_enable = 0;
+   blend.rt[0].blend_enable = 0;
+   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.logicop_enable = 0;
+   blend.logicop_func = PIPE_LOGICOP_CLEAR;
+   /* Needed to allow color writes to FB, even if blending disabled */
+   blend.rt[0].colormask = PIPE_MASK_RGBA;
+   blend.dither = 0;
+   zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend);
+   if (!zscan->blend)
+      goto error_blend;
+
+   for (i = 0; i < 3; ++i) {
+      memset(&sampler, 0, sizeof(sampler));
+      sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
+      sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
+      sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
+      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+      sampler.compare_func = PIPE_FUNC_ALWAYS;
+      sampler.normalized_coords = 1;
+      zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler);
+      if (!zscan->samplers[i])
+         goto error_samplers;
+   }
+
+   return true;
+
+error_samplers:
+   for (i = 0; i < 2; ++i)
+      if (zscan->samplers[i])
+         zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
+
+   zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
+
+error_blend:
+   zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
+
+error_rs_state:
+   return false;
+}
+
+static void
+cleanup_state(struct vl_zscan *zscan)
+{
+   unsigned i;
+
+   assert(zscan);
+
+   for (i = 0; i < 3; ++i)
+      zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
+
+   zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
+   zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
+}
+
+struct pipe_sampler_view *
+vl_zscan_linear(struct pipe_context *pipe, unsigned blocks_per_line)
+{
+   const unsigned total_size = blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT;
+
+   struct pipe_resource res_tmpl, *res;
+   struct pipe_sampler_view sv_tmpl, *sv;
+   struct pipe_transfer *buf_transfer;
+   unsigned x, y, i, pitch;
+   float *f;
+
+   struct pipe_box rect =
+   {
+      0, 0, 0,
+      BLOCK_WIDTH * blocks_per_line,
+      BLOCK_HEIGHT,
+      1
+   };
+
+   assert(pipe && blocks_per_line);
+
+   memset(&res_tmpl, 0, sizeof(res_tmpl));
+   res_tmpl.target = PIPE_TEXTURE_2D;
+   res_tmpl.format = PIPE_FORMAT_R32_FLOAT;
+   res_tmpl.width0 = BLOCK_WIDTH * blocks_per_line;
+   res_tmpl.height0 = BLOCK_HEIGHT;
+   res_tmpl.depth0 = 1;
+   res_tmpl.array_size = 1;
+   res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
+   res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
+
+   res = pipe->screen->resource_create(pipe->screen, &res_tmpl);
+   if (!res)
+      goto error_resource;
+
+   buf_transfer = pipe->get_transfer
+   (
+      pipe, res,
+      0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &rect
+   );
+   if (!buf_transfer)
+      goto error_transfer;
+
+   pitch = buf_transfer->stride / sizeof(float);
+
+   f = pipe->transfer_map(pipe, buf_transfer);
+   if (!f)
+      goto error_map;
+
+   for (i = 0; i < blocks_per_line; ++i)
+      for (y = 0; y < BLOCK_HEIGHT; ++y)
+         for (x = 0; x < BLOCK_WIDTH; ++x) {
+            float addr = x + y * BLOCK_WIDTH +
+               i * BLOCK_WIDTH * BLOCK_HEIGHT;
+
+            addr /= total_size;
+
+            f[i * BLOCK_WIDTH + y * pitch + x] = addr;
+         }
+
+   pipe->transfer_unmap(pipe, buf_transfer);
+   pipe->transfer_destroy(pipe, buf_transfer);
+
+   memset(&sv_tmpl, 0, sizeof(sv_tmpl));
+   u_sampler_view_default_template(&sv_tmpl, res, res->format);
+   sv = pipe->create_sampler_view(pipe, res, &sv_tmpl);
+   pipe_resource_reference(&res, NULL);
+   if (!sv)
+      goto error_map;
+
+   return sv;
+
+error_map:
+   pipe->transfer_destroy(pipe, buf_transfer);
+
+error_transfer:
+   pipe_resource_reference(&res, NULL);
+
+error_resource:
+   return NULL;
+}
+
+#if 0
+// TODO
+struct pipe_sampler_view *
+vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line);
+
+struct pipe_sampler_view *
+vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line);
+#endif
+
+bool
+vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
+              unsigned buffer_width, unsigned buffer_height,
+              unsigned blocks_per_line, unsigned blocks_total,
+              unsigned num_channels)
+{
+   assert(zscan && pipe);
+
+   zscan->pipe = pipe;
+   zscan->buffer_width = buffer_width;
+   zscan->buffer_height = buffer_height;
+   zscan->num_channels = num_channels;
+   zscan->blocks_per_line = blocks_per_line;
+   zscan->blocks_total = blocks_total;
+
+   if(!init_shaders(zscan))
+      return false;
+
+   if(!init_state(zscan)) {
+      cleanup_shaders(zscan);
+      return false;
+   }
+
+   return true;
+}
+
+void
+vl_zscan_cleanup(struct vl_zscan *zscan)
+{
+   assert(zscan);
+
+   cleanup_shaders(zscan);
+   cleanup_state(zscan);
+}
+
+void
+vl_zscan_set_layout(struct vl_zscan *zscan, struct pipe_sampler_view *layout)
+{
+   assert(zscan);
+   assert(layout);
+
+   pipe_sampler_view_reference(&zscan->scan, layout);
+}
+
+#if 0
+// TODO
+void
+vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
+#endif
+
+bool
+vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
+                     struct pipe_sampler_view *src, struct pipe_surface *dst)
+{
+   assert(zscan && buffer);
+
+   memset(buffer, 0, sizeof(struct vl_zscan_buffer));
+
+   buffer->zscan = zscan;
+
+   pipe_sampler_view_reference(&buffer->src, src);
+   pipe_sampler_view_reference(&buffer->scan, zscan->scan);
+   pipe_sampler_view_reference(&buffer->quant, zscan->quant);
+
+   buffer->viewport.scale[0] = dst->width;
+   buffer->viewport.scale[1] = dst->height;
+   buffer->viewport.scale[2] = 1;
+   buffer->viewport.scale[3] = 1;
+   buffer->viewport.translate[0] = 0;
+   buffer->viewport.translate[1] = 0;
+   buffer->viewport.translate[2] = 0;
+   buffer->viewport.translate[3] = 0;
+
+   buffer->fb_state.width = dst->width;
+   buffer->fb_state.height = dst->height;
+   buffer->fb_state.nr_cbufs = 1;
+   pipe_surface_reference(&buffer->fb_state.cbufs[0], dst);
+
+   return true;
+}
+
+void
+vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer)
+{
+   assert(buffer);
+
+   pipe_sampler_view_reference(&buffer->src, NULL);
+   pipe_sampler_view_reference(&buffer->scan, NULL);
+   pipe_sampler_view_reference(&buffer->quant, NULL);
+   pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL);
+}
+
+void
+vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)
+{
+   struct vl_zscan *zscan;
+
+   assert(buffer);
+
+   zscan = buffer->zscan;
+
+   zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state);
+   zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend);
+   zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 2, zscan->samplers);
+   zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state);
+   zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport);
+   zscan->pipe->set_fragment_sampler_views(zscan->pipe, 2, &buffer->src);
+   zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs);
+   zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs);
+   util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
+}
diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h
new file mode 100644
index 00000000000..28b990ca83b
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_zscan.h
@@ -0,0 +1,110 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_zscan_h
+#define vl_zscan_h
+
+#include <pipe/p_compiler.h>
+#include <pipe/p_state.h>
+
+/*
+ * shader based zscan and quantification
+ * expect usage of vl_vertex_buffers as a todo list
+ */
+struct vl_zscan
+{
+   struct pipe_context *pipe;
+
+   unsigned buffer_width;
+   unsigned buffer_height;
+
+   unsigned num_channels;
+
+   unsigned blocks_per_line;
+   unsigned blocks_total;
+
+   void *rs_state;
+   void *blend;
+
+   void *samplers[3];
+
+   void *vs, *fs;
+
+   struct pipe_sampler_view *scan;
+   struct pipe_sampler_view *quant;
+};
+
+struct vl_zscan_buffer
+{
+   struct vl_zscan *zscan;
+
+   struct pipe_viewport_state viewport;
+   struct pipe_framebuffer_state fb_state;
+
+   struct pipe_sampler_view *src, *scan, *quant;
+   struct pipe_surface *dst;
+};
+
+struct pipe_sampler_view *
+vl_zscan_linear(struct pipe_context *pipe, unsigned blocks_per_line);
+
+#if 0
+struct pipe_sampler_view *
+vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line);
+
+struct pipe_sampler_view *
+vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line);
+#endif
+
+bool
+vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
+              unsigned buffer_width, unsigned buffer_height,
+              unsigned blocks_per_line, unsigned blocks_total,
+              unsigned num_channels);
+
+void
+vl_zscan_set_layout(struct vl_zscan *zscan, struct pipe_sampler_view *layout);
+
+void
+vl_zscan_cleanup(struct vl_zscan *zscan);
+
+#if 0
+void
+vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
+#endif
+
+bool
+vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
+                     struct pipe_sampler_view *src, struct pipe_surface *dst);
+
+void
+vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer);
+
+void
+vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances);
+
+#endif