From e60a8e4fcf2b4073a5fc2d9ec1de5a6ca6c7b9fe Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Mon, 2 Nov 2009 20:32:58 -0500
Subject: g3dvl: DRM winsys changes.

---
 src/gallium/drivers/nouveau/nouveau_winsys.h       |   6 +
 src/gallium/drivers/nv40/Makefile                  |   3 +-
 src/gallium/drivers/nv40/nv40_miptree.c            |  54 +++
 src/gallium/drivers/nv40/nv40_state.h              |  13 +
 src/gallium/drivers/nv40/nv40_video_context.c      |  43 +++
 src/gallium/drivers/nv40/nv40_video_context.h      |  41 +++
 src/gallium/drivers/softpipe/sp_video_context.c    | 108 ++++--
 src/gallium/drivers/softpipe/sp_video_context.h    |  20 +-
 src/gallium/include/pipe/p_video_context.h         |  20 +-
 src/gallium/include/state_tracker/dri1_api.h       |  12 +-
 src/gallium/include/state_tracker/drm_api.h        |   6 +
 src/gallium/state_trackers/dri/dri_context.c       |  20 +-
 src/gallium/state_trackers/xorg/xvmc/context.c     |  46 +--
 src/gallium/state_trackers/xorg/xvmc/surface.c     |  23 +-
 .../state_trackers/xorg/xvmc/xvmc_private.h        |   4 +-
 .../winsys/drm/nouveau/drm/nouveau_drm_api.c       |  53 ++-
 .../winsys/drm/nouveau/drm/nouveau_drm_api.h       |   2 +
 src/gallium/winsys/g3dvl/nouveau/Makefile          | 148 +++++---
 .../winsys/g3dvl/nouveau/drm_nouveau_winsys.c      | 393 +++++++++++++++++++++
 src/gallium/winsys/g3dvl/vl_winsys.h               |  35 +-
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c         | 103 +++---
 21 files changed, 957 insertions(+), 196 deletions(-)
 create mode 100644 src/gallium/drivers/nv40/nv40_video_context.c
 create mode 100644 src/gallium/drivers/nv40/nv40_video_context.h
 create mode 100644 src/gallium/winsys/g3dvl/nouveau/drm_nouveau_winsys.c

diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index 42c77e5e778..600a86c795d 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -53,6 +53,12 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
 extern struct pipe_context *
 nv40_create(struct pipe_screen *, unsigned pctx_id);
 
+extern struct pipe_video_context *
+nv40_video_create(struct pipe_context *pipe, enum pipe_video_profile profile,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height,
+                  unsigned pvctx_id);
+
 extern struct pipe_screen *
 nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
 
diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile
index 0ecae2b4913..09348d98a74 100644
--- a/src/gallium/drivers/nv40/Makefile
+++ b/src/gallium/drivers/nv40/Makefile
@@ -24,6 +24,7 @@ C_SOURCES = \
 	nv40_surface.c \
 	nv40_transfer.c \
 	nv40_vbo.c \
-	nv40_vertprog.c
+	nv40_vertprog.c \
+        nv40_video_context.c
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
index 465dd3b0693..3e742007744 100644
--- a/src/gallium/drivers/nv40/nv40_miptree.c
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -204,6 +204,58 @@ nv40_miptree_surface_del(struct pipe_surface *ps)
 	FREE(ps);
 }
 
+static struct pipe_video_surface*
+nv40_video_surface_new(struct pipe_screen *screen,
+                       enum pipe_video_chroma_format chroma_format,
+                       unsigned width, unsigned height)
+{
+   struct nv40_video_surface *nv40_vsfc;
+   struct pipe_texture template;
+
+   assert(screen);
+   assert(width && height);
+
+   nv40_vsfc = CALLOC_STRUCT(nv40_video_surface);
+   if (!nv40_vsfc)
+      return NULL;
+
+   pipe_reference_init(&nv40_vsfc->base.reference, 1);
+   nv40_vsfc->base.screen = screen;
+   nv40_vsfc->base.chroma_format = chroma_format;
+   /*nv40_vsfc->base.surface_format = PIPE_VIDEO_SURFACE_FORMAT_VUYA;*/
+   nv40_vsfc->base.width = width;
+   nv40_vsfc->base.height = height;
+
+   memset(&template, 0, sizeof(struct pipe_texture));
+   template.target = PIPE_TEXTURE_2D;
+   template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
+   template.last_level = 0;
+   /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */
+   template.width[0] = util_next_power_of_two(width);
+   template.height[0] = util_next_power_of_two(height);
+   template.depth[0] = 1;
+   pf_get_block(template.format, &template.block);
+   template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+   nv40_vsfc->tex = screen->texture_create(screen, &template);
+   if (!nv40_vsfc->tex) {
+      FREE(nv40_vsfc);
+      return NULL;
+   }
+
+   return &nv40_vsfc->base;
+}
+
+
+static void
+nv40_video_surface_del(struct pipe_video_surface *vsfc)
+{
+   struct nv40_video_surface *nv40_vsfc = nv40_video_surface(vsfc);
+
+   pipe_texture_reference(&nv40_vsfc->tex, NULL);
+   FREE(nv40_vsfc);
+}
+
 void
 nv40_screen_init_miptree_functions(struct pipe_screen *pscreen)
 {
@@ -212,5 +264,7 @@ nv40_screen_init_miptree_functions(struct pipe_screen *pscreen)
 	pscreen->texture_destroy = nv40_miptree_destroy;
 	pscreen->get_tex_surface = nv40_miptree_surface_new;
 	pscreen->tex_surface_destroy = nv40_miptree_surface_del;
+        pscreen->video_surface_create = nv40_video_surface_new;
+        pscreen->video_surface_destroy = nv40_video_surface_del;
 }
 
diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h
index 8a9d8c8fdf6..ae32777f754 100644
--- a/src/gallium/drivers/nv40/nv40_state.h
+++ b/src/gallium/drivers/nv40/nv40_state.h
@@ -2,6 +2,7 @@
 #define __NV40_STATE_H__
 
 #include "pipe/p_state.h"
+#include "pipe/p_video_state.h"
 #include "tgsi/tgsi_scan.h"
 
 struct nv40_sampler_state {
@@ -85,4 +86,16 @@ struct nv40_miptree {
 	} level[PIPE_MAX_TEXTURE_LEVELS];
 };
 
+struct nv40_video_surface {
+	struct pipe_video_surface base;
+	struct pipe_texture *tex;
+};
+
+
+static INLINE struct nv40_video_surface*
+nv40_video_surface(struct pipe_video_surface *sfc)
+{
+   return (struct nv40_video_surface*)sfc;
+}
+
 #endif
diff --git a/src/gallium/drivers/nv40/nv40_video_context.c b/src/gallium/drivers/nv40/nv40_video_context.c
new file mode 100644
index 00000000000..e2985e5a9c8
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_video_context.c
@@ -0,0 +1,43 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nv40_video_context.h"
+#include <softpipe/sp_video_context.h>
+
+
+struct pipe_video_context *
+nv40_video_create(struct pipe_context *pipe, enum pipe_video_profile profile,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height,
+                  unsigned pvctx_id)
+{
+   assert(pipe);
+   return sp_video_create_ex(pipe, profile, chroma_format, width, height,
+                             VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
+                             VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
+                             true);
+}
diff --git a/src/gallium/drivers/nv40/nv40_video_context.h b/src/gallium/drivers/nv40/nv40_video_context.h
new file mode 100644
index 00000000000..206a342e35e
--- /dev/null
+++ b/src/gallium/drivers/nv40/nv40_video_context.h
@@ -0,0 +1,41 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef __NV40_VIDEO_CONTEXT_H__
+#define __NV40_VIDEO_CONTEXT_H__
+
+#include <pipe/p_video_context.h>
+
+struct pipe_context;
+
+struct pipe_video_context*
+nv40_video_create(struct pipe_context *pipe, enum pipe_video_profile profile,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height,
+                  unsigned pvctx_id);
+
+#endif
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index cae2d3efc58..f4f34e7a905 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,12 +22,13 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #include "sp_video_context.h"
 #include <pipe/p_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_rect.h>
 #include "softpipe/sp_winsys.h"
 #include "softpipe/sp_texture.h"
 
@@ -79,18 +80,40 @@ sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
 }
 
 static void
-sp_mpeg12_clear_surface(struct pipe_video_context *vpipe,
-                        unsigned x, unsigned y,
-                        unsigned width, unsigned height,
-                        unsigned value,
-                        struct pipe_surface *surface)
+sp_mpeg12_surface_fill(struct pipe_video_context *vpipe,
+                       struct pipe_surface *dst,
+                       unsigned dstx, unsigned dsty,
+                       unsigned width, unsigned height,
+                       unsigned value)
 {
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
 
    assert(vpipe);
-   assert(surface);
+   assert(dst);
 
-   ctx->pipe->surface_fill(ctx->pipe, surface, x, y, width, height, value);
+   if (ctx->pipe->surface_fill)
+      ctx->pipe->surface_fill(ctx->pipe, dst, dstx, dsty, width, height, value);
+   else
+      util_surface_fill(ctx->pipe, dst, dstx, dsty, width, height, value);
+}
+
+static void
+sp_mpeg12_surface_copy(struct pipe_video_context *vpipe,
+                       struct pipe_surface *dst,
+                       unsigned dstx, unsigned dsty,
+                       struct pipe_surface *src,
+                       unsigned srcx, unsigned srcy,
+                       unsigned width, unsigned height)
+{
+   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(dst);
+
+   if (ctx->pipe->surface_copy)
+      ctx->pipe->surface_copy(ctx->pipe, dst, dstx, dsty, src, srcx, srcy, width, height);
+   else
+      util_surface_copy(ctx->pipe, FALSE, dst, dstx, dsty, src, srcx, srcy, width, height);
 }
 
 static void
@@ -136,7 +159,8 @@ sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
    pipe_video_surface_reference(&ctx->decode_target, dt);
 }
 
-static void sp_mpeg12_set_csc_matrix(struct pipe_video_context *vpipe, const float *mat)
+static void
+sp_mpeg12_set_csc_matrix(struct pipe_video_context *vpipe, const float *mat)
 {
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
 
@@ -181,6 +205,7 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
    rast.point_size = 1;
    rast.offset_units = 1;
    rast.offset_scale = 1;
+   rast.gl_rasterization_rules = 1;
    /*rast.sprite_coord_mode[i] = ;*/
    ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
    ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
@@ -223,9 +248,12 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
 }
 
 static struct pipe_video_context *
-sp_mpeg12_create(struct pipe_screen *screen, enum pipe_video_profile profile,
+sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
                  enum pipe_video_chroma_format chroma_format,
-                 unsigned width, unsigned height)
+                 unsigned width, unsigned height,
+                 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
+                 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
+                 bool pot_buffers)
 {
    struct sp_mpeg12_context *ctx;
 
@@ -241,27 +269,20 @@ sp_mpeg12_create(struct pipe_screen *screen, enum pipe_video_profile profile,
    ctx->base.width = width;
    ctx->base.height = height;
 
-   ctx->base.screen = screen;
+   ctx->base.screen = pipe->screen;
    ctx->base.destroy = sp_mpeg12_destroy;
    ctx->base.decode_macroblocks = sp_mpeg12_decode_macroblocks;
-   ctx->base.clear_surface = sp_mpeg12_clear_surface;
    ctx->base.render_picture = sp_mpeg12_render_picture;
+   ctx->base.surface_fill = sp_mpeg12_surface_fill;
+   ctx->base.surface_copy = sp_mpeg12_surface_copy;
    ctx->base.set_decode_target = sp_mpeg12_set_decode_target;
    ctx->base.set_csc_matrix = sp_mpeg12_set_csc_matrix;
 
-   ctx->pipe = softpipe_create(screen);
-   if (!ctx->pipe) {
-      FREE(ctx);
-      return NULL;
-   }
+   ctx->pipe = pipe;
 
-   /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture */
    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
                                    width, height, chroma_format,
-                                   VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
-                                   /* TODO: Use XFER_NONE when implemented */
-                                   VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
-                                   true)) {
+                                   bufmode, eb_handling, pot_buffers)) {
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
@@ -289,15 +310,44 @@ struct pipe_video_context *
 sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
                 enum pipe_video_chroma_format chroma_format,
                 unsigned width, unsigned height)
+{
+   struct pipe_context *pipe;
+
+   assert(screen);
+   assert(width && height);
+
+   pipe = softpipe_create(screen);
+   if (!pipe)
+      return NULL;
+
+   /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture with softpipe */
+   /* TODO: Use XFER_NONE when implemented */
+   return sp_video_create_ex(pipe, profile,
+                             chroma_format,
+                             width, height,
+                             VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
+                             VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
+                             true);
+}
+
+struct pipe_video_context *
+sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
+                   enum pipe_video_chroma_format chroma_format,
+                   unsigned width, unsigned height,
+                   enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
+                   enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
+                   bool pot_buffers)
 {
    assert(screen);
    assert(width && height);
 
    switch (u_reduce_video_profile(profile)) {
       case PIPE_VIDEO_CODEC_MPEG12:
-         return sp_mpeg12_create(screen, profile,
+         return sp_mpeg12_create(pipe, profile,
                                  chroma_format,
-                                 width, height);
+                                 width, height,
+                                 bufmode, eb_handling,
+                                 pot_buffers);
       default:
          return NULL;
    }
diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h
index ccbd1ffe4c8..40743ac423c 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.h
+++ b/src/gallium/drivers/softpipe/sp_video_context.h
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #ifndef SP_VIDEO_CONTEXT_H
@@ -54,4 +54,14 @@ sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
                 enum pipe_video_chroma_format chroma_format,
                 unsigned width, unsigned height);
 
+/* Other drivers can call this function in their pipe_video_context constructors and pass it
+   an accelerated pipe_context along with suitable buffering modes, etc */
+struct pipe_video_context *
+sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
+                   enum pipe_video_chroma_format chroma_format,
+                   unsigned width, unsigned height,
+                   enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
+                   enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
+                   bool pot_buffers);
+
 #endif /* SP_VIDEO_CONTEXT_H */
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 6ae31418fa8..9ae595b224e 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -72,12 +72,6 @@ struct pipe_video_context
                               struct pipe_macroblock *macroblocks,
                               struct pipe_fence_handle **fence);
 
-   void (*clear_surface)(struct pipe_video_context *vpipe,
-                         unsigned x, unsigned y,
-                         unsigned width, unsigned height,
-                         unsigned value,
-                         struct pipe_surface *surface);
-
    void (*render_picture)(struct pipe_video_context     *vpipe,
                           /*struct pipe_surface         *backround,
                           struct pipe_video_rect        *backround_area,*/
@@ -95,6 +89,20 @@ struct pipe_video_context
                           struct pipe_video_rect        *layer_src_areas,
                           struct pipe_video_rect        *layer_dst_areas,*/
                           struct pipe_fence_handle      **fence);
+
+   void (*surface_fill)(struct pipe_video_context *vpipe,
+                        struct pipe_surface *dst,
+                        unsigned dstx, unsigned dsty,
+                        unsigned width, unsigned height,
+                        unsigned value);
+
+   void (*surface_copy)(struct pipe_video_context *vpipe,
+                        struct pipe_surface *dst,
+                        unsigned dstx, unsigned dsty,
+                        struct pipe_surface *src,
+                        unsigned srcx, unsigned srcy,
+                        unsigned width, unsigned height);
+
    /*@}*/
 
    /**
diff --git a/src/gallium/include/state_tracker/dri1_api.h b/src/gallium/include/state_tracker/dri1_api.h
index b173ba3683d..603f5d8af83 100644
--- a/src/gallium/include/state_tracker/dri1_api.h
+++ b/src/gallium/include/state_tracker/dri1_api.h
@@ -29,11 +29,11 @@ struct dri1_api_version
 
 struct dri1_api_lock_funcs
 {
-   void (*lock) (struct pipe_context * pipe);
-   void (*unlock) (struct pipe_context * locked_pipe);
-      boolean(*is_locked) (struct pipe_context * locked_pipe);
-      boolean(*is_lock_lost) (struct pipe_context * locked_pipe);
-   void (*clear_lost_lock) (struct pipe_context * locked_pipe);
+   void (*lock) (void *pipe_priv);
+   void (*unlock) (void *locked_pipe_priv);
+      boolean(*is_locked) (void *locked_pipe_priv);
+      boolean(*is_lock_lost) (void *locked_pipe_priv);
+   void (*clear_lost_lock) (void *locked_pipe_priv);
 };
 
 struct dri1_api
@@ -46,7 +46,7 @@ struct dri1_api
 
    /*@{ */
 
-   struct pipe_surface *(*front_srf_locked) (struct pipe_context *
+   struct pipe_surface *(*front_srf_locked) (struct pipe_screen *
 					     locked_pipe);
 
    void (*present_locked) (struct pipe_context * locked_pipe,
diff --git a/src/gallium/include/state_tracker/drm_api.h b/src/gallium/include/state_tracker/drm_api.h
index 4d1259e1ee7..ea0b91c5813 100644
--- a/src/gallium/include/state_tracker/drm_api.h
+++ b/src/gallium/include/state_tracker/drm_api.h
@@ -8,6 +8,7 @@ struct pipe_screen;
 struct pipe_winsys;
 struct pipe_buffer;
 struct pipe_context;
+struct pipe_video_context;
 struct pipe_texture;
 
 enum drm_create_screen_mode {
@@ -36,6 +37,11 @@ struct drm_api
 	                                      struct drm_create_screen_arg *arg);
 	struct pipe_context* (*create_context)(struct drm_api *api,
 	                                       struct pipe_screen *screen);
+        struct pipe_video_context* (*create_video_context)(struct drm_api *api,
+                                                           struct pipe_screen *screen,
+                                                           enum pipe_video_profile profile,
+                                                           enum pipe_video_chroma_format chroma_format,
+                                                           unsigned width, unsigned height);
 	/*@}*/
 
 	/**
diff --git a/src/gallium/state_trackers/dri/dri_context.c b/src/gallium/state_trackers/dri/dri_context.c
index 8819936fcaf..3ddff30f883 100644
--- a/src/gallium/state_trackers/dri/dri_context.c
+++ b/src/gallium/state_trackers/dri/dri_context.c
@@ -175,33 +175,33 @@ dri_make_current(__DRIcontextPrivate * cPriv,
 }
 
 static void
-st_dri_lock(struct pipe_context *pipe)
+st_dri_lock(void *pipe_priv)
 {
-   dri_lock((struct dri_context *)pipe->priv);
+   dri_lock((struct dri_context *)pipe_priv);
 }
 
 static void
-st_dri_unlock(struct pipe_context *pipe)
+st_dri_unlock(void *pipe_priv)
 {
-   dri_unlock((struct dri_context *)pipe->priv);
+   dri_unlock((struct dri_context *)pipe_priv);
 }
 
 static boolean
-st_dri_is_locked(struct pipe_context *pipe)
+st_dri_is_locked(void *pipe_priv)
 {
-   return ((struct dri_context *)pipe->priv)->isLocked;
+   return ((struct dri_context *)pipe_priv)->isLocked;
 }
 
 static boolean
-st_dri_lost_lock(struct pipe_context *pipe)
+st_dri_lost_lock(void *pipe_priv)
 {
-   return ((struct dri_context *)pipe->priv)->wsLostLock;
+   return ((struct dri_context *)pipe_priv)->wsLostLock;
 }
 
 static void
-st_dri_clear_lost_lock(struct pipe_context *pipe)
+st_dri_clear_lost_lock(void *pipe_priv)
 {
-   ((struct dri_context *)pipe->priv)->wsLostLock = FALSE;
+   ((struct dri_context *)pipe_priv)->wsLostLock = FALSE;
 }
 
 struct dri1_api_lock_funcs dri1_lf = {
diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index c8a389385a8..56003618ada 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #include <assert.h>
@@ -121,7 +121,7 @@ static enum pipe_video_profile ProfileToPipe(int xvmc_profile)
       assert(0);
    if (xvmc_profile & XVMC_MPEG_4)
       assert(0);
-	
+
    assert(0);
 
    return -1;
@@ -152,8 +152,8 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
    int mc_type;
    int surface_flags;
    Status ret;
-   struct pipe_screen *screen;
-   struct pipe_video_context *vpipe;
+   struct vl_screen *vscreen;
+   struct vl_context *vctx;
    XvMCContextPrivate *context_priv;
    float csc[16];
 
@@ -188,18 +188,18 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       return BadAlloc;
 
    /* TODO: Reuse screen if process creates another context */
-   screen = vl_screen_create(dpy, scrn);
+   vscreen = vl_screen_create(dpy, scrn);
 
-   if (!screen) {
+   if (!vscreen) {
       FREE(context_priv);
       return BadAlloc;
    }
 
-   vpipe = vl_video_create(dpy, scrn, screen, ProfileToPipe(mc_type),
-                           FormatToPipe(chroma_format), width, height);
+   vctx = vl_video_create(vscreen, ProfileToPipe(mc_type),
+                          FormatToPipe(chroma_format), width, height);
 
-   if (!vpipe) {
-      screen->destroy(screen);
+   if (!vctx) {
+      vl_screen_destroy(vscreen);
       FREE(context_priv);
       return BadAlloc;
    }
@@ -211,9 +211,9 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       VL_CSC_COLOR_STANDARD_IDENTITY : VL_CSC_COLOR_STANDARD_BT_601,
       NULL, true, csc
    );
-   vpipe->set_csc_matrix(vpipe, csc);
+   vctx->vpipe->set_csc_matrix(vctx->vpipe, csc);
 
-   context_priv->vpipe = vpipe;
+   context_priv->vctx = vctx;
 
    context->context_id = XAllocID(dpy);
    context->surface_type_id = surface_type_id;
@@ -222,7 +222,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
    context->flags = flags;
    context->port = port;
    context->privData = context_priv;
-	
+
    SyncHandle();
 
    return Success;
@@ -230,8 +230,8 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
 
 Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
 {
-   struct pipe_screen *screen;
-   struct pipe_video_context *vpipe;
+   struct vl_screen *vscreen;
+   struct vl_context *vctx;
    XvMCContextPrivate *context_priv;
 
    assert(dpy);
@@ -240,11 +240,11 @@ Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
       return XvMCBadContext;
 
    context_priv = context->privData;
-   vpipe = context_priv->vpipe;
+   vctx = context_priv->vctx;
    pipe_surface_reference(&context_priv->backbuffer, NULL);
-   screen = vpipe->screen;
-   vpipe->destroy(vpipe);
-   screen->destroy(screen);
+   vscreen = vctx->vscreen;
+   vl_video_destroy(vctx);
+   vl_screen_destroy(vscreen);
    FREE(context_priv);
    context->privData = NULL;
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index bf9038f356e..e1779eafa27 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -27,6 +27,7 @@
 
 #include <assert.h>
 #include <X11/Xlibint.h>
+#include <vl_winsys.h>
 #include <pipe/p_video_context.h>
 #include <pipe/p_video_state.h>
 #include <pipe/p_state.h>
@@ -83,13 +84,16 @@ static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, int xvmc_
 }
 
 static bool
-CreateOrResizeBackBuffer(struct pipe_video_context *vpipe, unsigned int width, unsigned int height,
+CreateOrResizeBackBuffer(struct vl_context *vctx, unsigned int width, unsigned int height,
                          struct pipe_surface **backbuffer)
 {
+   struct pipe_video_context *vpipe;
    struct pipe_texture template;
    struct pipe_texture *tex;
 
-   assert(vpipe);
+   assert(vctx);
+
+   vpipe = vctx->vpipe;
 
    if (*backbuffer) {
       if ((*backbuffer)->width != width || (*backbuffer)->height != height)
@@ -100,8 +104,7 @@ CreateOrResizeBackBuffer(struct pipe_video_context *vpipe, unsigned int width, u
 
    memset(&template, 0, sizeof(struct pipe_texture));
    template.target = PIPE_TEXTURE_2D;
-   /* XXX: Needs to match the drawable's format? */
-   template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
+   template.format = vctx->vscreen->format;
    template.last_level = 0;
    template.width[0] = width;
    template.height[0] = height;
@@ -123,7 +126,7 @@ CreateOrResizeBackBuffer(struct pipe_video_context *vpipe, unsigned int width, u
 
    /* Clear the backbuffer in case the video doesn't cover the whole window */
    /* FIXME: Need to clear every time a frame moves and leaves dirty rects */
-   vpipe->clear_surface(vpipe, 0, 0, width, height, 0, *backbuffer);
+   vpipe->surface_fill(vpipe, *backbuffer, 0, 0, width, height, 0);
 
    return true;
 }
@@ -186,7 +189,7 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
       return XvMCBadSurface;
 
    context_priv = context->privData;
-   vpipe = context_priv->vpipe;
+   vpipe = context_priv->vctx->vpipe;
 
    surface_priv = CALLOC(1, sizeof(XvMCSurfacePrivate));
    if (!surface_priv)
@@ -266,7 +269,7 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    assert(!future_surface || future_surface_priv->context == context);
 
    context_priv = context->privData;
-   vpipe = context_priv->vpipe;
+   vpipe = context_priv->vctx->vpipe;
 
    t_vsfc = target_surface_priv->pipe_vsfc;
    p_vsfc = past_surface ? past_surface_priv->pipe_vsfc : NULL;
@@ -345,15 +348,15 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    surface_priv = surface->privData;
    context = surface_priv->context;
    context_priv = context->privData;
-   vpipe = context_priv->vpipe;
+   vpipe = context_priv->vctx->vpipe;
 
-   if (!CreateOrResizeBackBuffer(vpipe, width, height, &context_priv->backbuffer))
+   if (!CreateOrResizeBackBuffer(context_priv->vctx, width, height, &context_priv->backbuffer))
       return BadAlloc;
 
    vpipe->render_picture(vpipe, surface_priv->pipe_vsfc, PictureToPipe(flags), &src_rect,
                          context_priv->backbuffer, &dst_rect, surface_priv->disp_fence);
 
-   vl_video_bind_drawable(vpipe, drawable);
+   vl_video_bind_drawable(context_priv->vctx, drawable);
 	
    vpipe->screen->flush_frontbuffer
    (
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 42337631ca1..5fb994db740 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -34,13 +34,13 @@
 #define BLOCK_SIZE_SAMPLES 64
 #define BLOCK_SIZE_BYTES (BLOCK_SIZE_SAMPLES * 2)
 
-struct pipe_video_context;
+struct vl_context;
 struct pipe_surface;
 struct pipe_fence_handle;
 
 typedef struct
 {
-	struct pipe_video_context *vpipe;
+	struct vl_context *vctx;
 	struct pipe_surface *backbuffer;
 } XvMCContextPrivate;
 
diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
index 317dc44d22f..47e53936749 100644
--- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
+++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
@@ -45,9 +45,9 @@ dri_surface_from_handle(struct drm_api *api, struct pipe_screen *pscreen,
 }
 
 static struct pipe_surface *
-nouveau_dri1_front_surface(struct pipe_context *pipe)
+nouveau_dri1_front_surface(struct pipe_screen *screen)
 {
-	return nouveau_winsys_screen(pipe->screen)->front;
+	return nouveau_winsys_screen(screen)->front;
 }
 
 static struct dri1_api nouveau_dri1_api = {
@@ -190,6 +190,54 @@ nouveau_drm_create_context(struct drm_api *api, struct pipe_screen *pscreen)
 	return nvws->pctx[i];
 }
 
+typedef struct pipe_video_context* (*nouveau_video_create)(struct pipe_context *pipe,
+                                                           enum pipe_video_profile profile,
+                                                           enum pipe_video_chroma_format chroma_format,
+                                                           unsigned width, unsigned height,
+                                                           unsigned pvctx);
+
+static struct pipe_video_context *
+nouveau_drm_create_video_context(struct drm_api *api, struct pipe_screen *pscreen,
+                                 enum pipe_video_profile profile,
+                                 enum pipe_video_chroma_format chroma_format,
+                                 unsigned width, unsigned height)
+{
+	struct nouveau_winsys *nvws = nouveau_winsys_screen(pscreen);
+        nouveau_video_create init;
+	unsigned chipset = nouveau_screen(pscreen)->device->chipset;
+        struct pipe_context *pipe;
+	int i;
+
+	switch (chipset & 0xf0) {
+	case 0x40:
+	case 0x60:
+		init = nv40_video_create;
+		break;
+	default:
+		debug_printf("%s: unknown chipset nv%02x\n", __func__, chipset);
+		return NULL;
+	}
+
+	/* Find a free slot for a pipe video context, allocate a new one if needed */
+	for (i = 0; i < nvws->nr_pvctx; i++) {
+		if (nvws->pvctx[i] == NULL)
+			break;
+	}
+
+	if (i == nvws->nr_pvctx) {
+		nvws->nr_pvctx++;
+		nvws->pvctx = realloc(nvws->pvctx,
+				      sizeof(*nvws->pvctx) * nvws->nr_pvctx);
+	}
+
+        pipe = nouveau_drm_create_context(api, pscreen);
+        if (!pipe)
+           return NULL;
+
+	nvws->pvctx[i] = init(pipe, profile, chroma_format, width, height, i);
+	return nvws->pvctx[i];
+}
+
 static struct pipe_texture *
 nouveau_drm_pt_from_name(struct drm_api *api, struct pipe_screen *pscreen,
 			 struct pipe_texture *templ, const char *name,
@@ -254,6 +302,7 @@ nouveau_drm_handle_from_pt(struct drm_api *api, struct pipe_screen *pscreen,
 struct drm_api drm_api_hooks = {
 	.create_screen = nouveau_drm_create_screen,
 	.create_context = nouveau_drm_create_context,
+        .create_video_context = nouveau_drm_create_video_context,
 	.texture_from_shared_handle = nouveau_drm_pt_from_name,
 	.shared_handle_from_texture = nouveau_drm_name_from_pt,
 	.local_handle_from_texture = nouveau_drm_handle_from_pt,
diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.h b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.h
index e61e0e0957a..fa4e821e60c 100644
--- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.h
+++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.h
@@ -15,6 +15,8 @@ struct nouveau_winsys {
 
 	unsigned nr_pctx;
 	struct pipe_context **pctx;
+        unsigned nr_pvctx;
+        struct pipe_video_context **pvctx;
 
 	struct pipe_surface *front;
 };
diff --git a/src/gallium/winsys/g3dvl/nouveau/Makefile b/src/gallium/winsys/g3dvl/nouveau/Makefile
index 2997f6b79ce..4eba18a0d5c 100644
--- a/src/gallium/winsys/g3dvl/nouveau/Makefile
+++ b/src/gallium/winsys/g3dvl/nouveau/Makefile
@@ -1,50 +1,98 @@
-TARGET		= libnouveau_dri.so
-GALLIUMDIR	= ../../..
-DRMDIR		?= /usr
-DRIDIR		= ../../../../driclient
-
-OBJECTS		= nouveau_screen_vl.o nouveau_context_vl.o nouveau_swapbuffers.o
-
-CFLAGS		+= -g -Wall -Werror=implicit-function-declaration -fPIC	\
-		   -I${GALLIUMDIR}/include				\
-		   -I${GALLIUMDIR}/winsys/g3dvl				\
-		   -I${GALLIUMDIR}/winsys/drm/nouveau			\
-		   -I${DRMDIR}/include					\
-		   -I${DRMDIR}/include/drm				\
-		   -I${DRMDIR}/include/nouveau				\
-		   -I${GALLIUMDIR}/drivers				\
-		   -I${GALLIUMDIR}/auxiliary				\
-		   -I${DRIDIR}/include
-
-LDFLAGS		+= -L${DRMDIR}/lib				\
-		   -L${DRIDIR}/lib				\
-		   -L${GALLIUMDIR}/winsys/drm/nouveau/common	\
-		   -L${GALLIUMDIR}/auxiliary/draw		\
-		   -L${GALLIUMDIR}/auxiliary/tgsi		\
-		   -L${GALLIUMDIR}/auxiliary/translate		\
-		   -L${GALLIUMDIR}/auxiliary/rtasm		\
-		   -L${GALLIUMDIR}/auxiliary/cso_cache		\
-		   -L${GALLIUMDIR}/drivers/nv04			\
-		   -L${GALLIUMDIR}/drivers/nv10			\
-		   -L${GALLIUMDIR}/drivers/nv20			\
-		   -L${GALLIUMDIR}/drivers/nv30			\
-		   -L${GALLIUMDIR}/drivers/nv40			\
-		   -L${GALLIUMDIR}/drivers/nv50
-
-LIBS		+= -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv04 -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lm
-
-#############################################
-
-.PHONY	= all clean libdriclient
-
-all: ${TARGET}
-
-${TARGET}: ${OBJECTS} libdriclient
-	$(CC) ${LDFLAGS} -shared -o $@ ${OBJECTS} ${LIBS}
-
-libdriclient:
-	cd ${DRIDIR}/src; ${MAKE}
-
-clean:
-	cd ${DRIDIR}/src; ${MAKE} clean
-	rm -rf ${OBJECTS} ${TARGET}
+# This makefile produces a libXvMCg3dvl.so which is
+# based on DRM/DRI
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+XVMC_MAJOR = 1
+XVMC_MINOR = 0
+XVMC_LIB = XvMCg3dvl
+XVMC_LIB_NAME = lib$(XVMC_LIB).so
+XVMC_LIB_DEPS = $(EXTRA_LIB_PATH) -lXvMC -lXv -lX11 -lm
+
+SOURCES = drm_nouveau_winsys.c
+
+DRIVER_INCLUDES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-I) \
+                   -I$(TOP)/src/gallium/winsys/drm/nouveau \
+                   -I$(TOP)/src/driclient/include
+DRIVER_DEFINES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-other)
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \
+	$(TOP)/src/gallium/drivers/nv04/libnv04.a \
+	$(TOP)/src/gallium/drivers/nv10/libnv10.a \
+	$(TOP)/src/gallium/drivers/nv20/libnv20.a \
+	$(TOP)/src/gallium/drivers/nv30/libnv30.a \
+	$(TOP)/src/gallium/drivers/nv40/libnv40.a \
+	$(TOP)/src/gallium/drivers/nv50/libnv50.a \
+	$(TOP)/src/gallium/drivers/nouveau/libnouveau.a
+
+DRI_LIB_DEPS += $(shell pkg-config libdrm_nouveau --libs)
+
+###############################################################
+
+INCLUDES = $(DRIVER_INCLUDES) \
+           -I$(TOP)/src/gallium/include \
+           -I$(TOP)/src/gallium/auxiliary \
+           -I$(TOP)/src/gallium/drivers \
+           -I$(TOP)/src/gallium/winsys/g3dvl \
+           -I$(TOP)/src/driclient/include
+
+DEFINES += $(DRIVER_DEFINES) \
+           -DGALLIUM_SOFTPIPE \
+	   -DGALLIUM_TRACE
+
+# XXX: Hack, if we include libxvmctracker.a in LIBS none of the symbols are
+# pulled in by the linker because xsp_winsys.c doesn't refer to them
+OBJECTS = $(SOURCES:.c=.o) $(TOP)/src/gallium/state_trackers/xorg/xvmc/*.o
+
+LIBS = $(PIPE_DRIVERS) \
+       $(TOP)/src/driclient/lib/libdriclient.a \
+       $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+       $(TOP)/src/gallium/auxiliary/vl/libvl.a \
+       $(TOP)/src/gallium/auxiliary/draw/libdraw.a \
+       $(TOP)/src/gallium/auxiliary/tgsi/libtgsi.a \
+       $(TOP)/src/gallium/auxiliary/translate/libtranslate.a \
+       $(TOP)/src/gallium/auxiliary/cso_cache/libcso_cache.a \
+       $(TOP)/src/gallium/auxiliary/rtasm/librtasm.a \
+       $(TOP)/src/gallium/auxiliary/util/libutil.a
+
+.c.o:
+	$(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@
+
+.S.o:
+	$(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@
+
+.PHONY: default $(TOP)/$(LIB_DIR)/gallium clean
+
+default: depend $(TOP)/$(LIB_DIR)/gallium $(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME)
+
+$(TOP)/$(LIB_DIR)/gallium:
+	@mkdir -p $(TOP)/$(LIB_DIR)/gallium
+
+# Make the libXvMCg3dvl.so library
+$(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME): $(OBJECTS) $(LIBS) Makefile
+	$(MKLIB) -o $(XVMC_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+		-major $(XVMC_MAJOR) -minor $(XVMC_MINOR) $(MKLIB_OPTIONS) \
+		-install $(TOP)/$(LIB_DIR)/gallium -id $(INSTALL_LIB_DIR)/lib$(XVMC_LIB).1.dylib \
+		$(XVMC_LIB_DEPS) $(DRI_LIB_DEPS) $(OBJECTS) $(LIBS)
+
+depend: $(SOURCES) Makefile
+	$(RM) depend
+	touch depend
+	$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDES) $(SOURCES)
+
+#install: default
+#	$(INSTALL) -d $(INSTALL_DIR)/include/GL
+#	$(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR)
+#	$(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL
+#	@if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \
+#		$(INSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \
+#	fi
+
+clean: Makefile
+	$(RM) $(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME)
+	$(RM) *.o *~
+	$(RM) depend depend.bak
+
+-include depend
diff --git a/src/gallium/winsys/g3dvl/nouveau/drm_nouveau_winsys.c b/src/gallium/winsys/g3dvl/nouveau/drm_nouveau_winsys.c
new file mode 100644
index 00000000000..257aa0a1201
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/nouveau/drm_nouveau_winsys.c
@@ -0,0 +1,393 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <vl_winsys.h>
+#include <driclient.h>
+#include <state_tracker/dri1_api.h>
+#include <pipe/p_video_context.h>
+#include <pipe/p_state.h>
+#include <util/u_memory.h>
+
+struct vl_dri_screen
+{
+   struct vl_screen base;
+   Visual *visual;
+   struct drm_api *api;
+   dri_screen_t *dri_screen;
+   dri_framebuffer_t dri_framebuf;
+   struct dri1_api *api_hooks;
+};
+
+struct vl_dri_context
+{
+   struct vl_context base;
+   boolean is_locked;
+   boolean lost_lock;
+   drmLock *lock;
+   dri_context_t *dri_context;
+   int fd;
+   struct pipe_video_context *vpipe;
+   dri_drawable_t *drawable;
+};
+
+static void
+vl_dri_lock(void *priv)
+{
+   struct vl_dri_context *vl_dri_ctx = priv;
+   drm_context_t hw_context;
+   char ret = 0;
+
+   assert(priv);
+
+   hw_context = vl_dri_ctx->dri_context->drm_context;
+
+   DRM_CAS(vl_dri_ctx->lock, hw_context, DRM_LOCK_HELD | hw_context, ret);
+   if (ret) {
+      drmGetLock(vl_dri_ctx->fd, hw_context, 0);
+      vl_dri_ctx->lost_lock = TRUE;
+   }
+   vl_dri_ctx->is_locked = TRUE;
+}
+
+static void
+vl_dri_unlock(void *priv)
+{
+   struct vl_dri_context *vl_dri_ctx = priv;
+   drm_context_t hw_context;
+
+   assert(priv);
+
+   hw_context = vl_dri_ctx->dri_context->drm_context;
+
+   vl_dri_ctx->is_locked = FALSE;
+   DRM_UNLOCK(vl_dri_ctx->fd, vl_dri_ctx->lock, hw_context);
+}
+
+static boolean
+vl_dri_is_locked(void *priv)
+{
+   struct vl_dri_context *vl_dri_ctx = priv;
+
+   assert(priv);
+
+   return vl_dri_ctx->is_locked;
+}
+
+static boolean
+vl_dri_lost_lock(void *priv)
+{
+   struct vl_dri_context *vl_dri_ctx = priv;
+
+   assert(priv);
+
+   return vl_dri_ctx->lost_lock;
+}
+
+static void
+vl_dri_clear_lost_lock(void *priv)
+{
+   struct vl_dri_context *vl_dri_ctx = priv;
+
+   assert(priv);
+
+   vl_dri_ctx->lost_lock = FALSE;
+}
+
+struct dri1_api_lock_funcs dri1_lf =
+{
+   .lock = vl_dri_lock,
+   .unlock = vl_dri_unlock,
+   .is_locked = vl_dri_is_locked,
+   .is_lock_lost = vl_dri_lost_lock,
+   .clear_lost_lock = vl_dri_clear_lost_lock
+};
+
+static void
+vl_dri_copy_version(struct dri1_api_version *dst, dri_version_t *src)
+{
+   assert(src);
+   assert(dst);
+   dst->major = src->major;
+   dst->minor = src->minor;
+   dst->patch_level = src->patch;
+}
+
+static boolean
+vl_dri_intersect_src_bbox(struct drm_clip_rect *dst, int dst_x, int dst_y,
+                          const struct drm_clip_rect *src, const struct drm_clip_rect *bbox)
+{
+   int xy1;
+   int xy2;
+
+   assert(dst);
+   assert(src);
+   assert(bbox);
+
+   xy1 = ((int)src->x1 > (int)bbox->x1 + dst_x) ? src->x1 :
+      (int)bbox->x1 + dst_x;
+   xy2 = ((int)src->x2 < (int)bbox->x2 + dst_x) ? src->x2 :
+      (int)bbox->x2 + dst_x;
+   if (xy1 >= xy2 || xy1 < 0)
+      return FALSE;
+
+   dst->x1 = xy1;
+   dst->x2 = xy2;
+
+   xy1 = ((int)src->y1 > (int)bbox->y1 + dst_y) ? src->y1 :
+      (int)bbox->y1 + dst_y;
+   xy2 = ((int)src->y2 < (int)bbox->y2 + dst_y) ? src->y2 :
+      (int)bbox->y2 + dst_y;
+   if (xy1 >= xy2 || xy1 < 0)
+      return FALSE;
+
+   dst->y1 = xy1;
+   dst->y2 = xy2;
+   return TRUE;
+}
+
+static void
+vl_clip_copy(struct vl_dri_context *vl_dri_ctx,
+	     struct pipe_surface *dst,
+	     struct pipe_surface *src,
+	     const struct drm_clip_rect *src_bbox)
+{
+   struct pipe_video_context *vpipe = vl_dri_ctx->base.vpipe;
+   struct drm_clip_rect clip;
+   struct drm_clip_rect *cur;
+   int i;
+
+   assert(vl_dri_ctx);
+   assert(dst);
+   assert(src);
+   assert(src_bbox);
+
+   assert(vl_dri_ctx->drawable->cliprects);
+   assert(vl_dri_ctx->drawable->num_cliprects > 0);
+
+   cur = vl_dri_ctx->drawable->cliprects;
+
+   for (i = 0; i < vl_dri_ctx->drawable->num_cliprects; ++i) {
+      if (vl_dri_intersect_src_bbox(&clip, vl_dri_ctx->drawable->x, vl_dri_ctx->drawable->y, cur++, src_bbox))
+         vpipe->surface_copy
+         (
+            vpipe, dst, clip.x1, clip.y1, src,
+            (int)clip.x1 - vl_dri_ctx->drawable->x,
+            (int)clip.y1 - vl_dri_ctx->drawable->y,
+            clip.x2 - clip.x1, clip.y2 - clip.y1
+         );
+   }
+}
+
+static void
+vl_dri_update_drawables_locked(struct vl_dri_context *vl_dri_ctx)
+{
+   struct vl_dri_screen *vl_dri_scrn;
+
+   assert(vl_dri_ctx);
+
+   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
+
+   if (vl_dri_ctx->lost_lock) {
+      vl_dri_ctx->lost_lock = FALSE;
+      DRI_VALIDATE_DRAWABLE_INFO(vl_dri_scrn->dri_screen, vl_dri_ctx->drawable);
+   }
+}
+
+static void
+vl_dri_flush_frontbuffer(struct pipe_screen *screen,
+                         struct pipe_surface *surf, void *context_private)
+{
+   struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)context_private;
+   struct vl_dri_screen *vl_dri_scrn;
+   struct drm_clip_rect src_bbox;
+   boolean save_lost_lock = FALSE;
+
+   assert(screen);
+   assert(surf);
+   assert(context_private);
+
+   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
+
+   vl_dri_lock(vl_dri_ctx);
+
+   save_lost_lock = vl_dri_ctx->lost_lock;
+
+   vl_dri_update_drawables_locked(vl_dri_ctx);
+
+   src_bbox.x1 = 0;
+   src_bbox.x2 = vl_dri_ctx->drawable->w;
+   src_bbox.y1 = 0;
+   src_bbox.y2 = vl_dri_ctx->drawable->h;
+
+#if 0
+   if (vl_dri_scrn->_api_hooks->present_locked)
+      vl_dri_scrn->api_hooks->present_locked(pipe, surf,
+                                             vl_dri_ctx->drawable->cliprects,
+                                             vl_dri_ctx->drawable->num_cliprects,
+                                             vl_dri_ctx->drawable->x, vl_dri_drawable->y,
+                                             &bbox, NULL /*fence*/);
+   else
+#endif
+   if (vl_dri_scrn->api_hooks->front_srf_locked) {
+      struct pipe_surface *front = vl_dri_scrn->api_hooks->front_srf_locked(screen);
+
+      if (front)
+         vl_clip_copy(vl_dri_ctx, front, surf, &src_bbox);
+
+      //st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, fence);
+   }
+
+   vl_dri_ctx->lost_lock = save_lost_lock;
+
+   vl_dri_unlock(vl_dri_ctx);
+}
+
+Drawable
+vl_video_bind_drawable(struct vl_context *vctx, Drawable drawable)
+{
+   struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)vctx;
+   struct vl_dri_screen *vl_dri_scrn;
+   dri_drawable_t *dri_drawable;
+   Drawable old_drawable = None;
+
+   assert(vctx);
+
+   if (vl_dri_ctx->drawable)
+      old_drawable = vl_dri_ctx->drawable->x_drawable;
+
+   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
+   driCreateDrawable(vl_dri_scrn->dri_screen, drawable, &dri_drawable);
+   vl_dri_ctx->drawable = dri_drawable;
+
+   return old_drawable;
+}
+
+struct vl_screen*
+vl_screen_create(Display *display, int screen)
+{
+   struct vl_dri_screen *vl_dri_scrn;
+   struct dri1_create_screen_arg arg;
+
+   assert(display);
+
+   vl_dri_scrn = CALLOC_STRUCT(vl_dri_screen);
+   if (!vl_dri_scrn)
+      return NULL;
+
+   driCreateScreen(display, screen, &vl_dri_scrn->dri_screen, &vl_dri_scrn->dri_framebuf);
+   vl_dri_scrn->api = drm_api_create();
+
+   arg.base.mode = DRM_CREATE_DRI1;
+   arg.lf = &dri1_lf;
+   arg.ddx_info = vl_dri_scrn->dri_framebuf.private;
+   arg.ddx_info_size = vl_dri_scrn->dri_framebuf.private_size;
+   arg.sarea = vl_dri_scrn->dri_screen->sarea;
+   vl_dri_copy_version(&arg.ddx_version, &vl_dri_scrn->dri_screen->ddx);
+   vl_dri_copy_version(&arg.dri_version, &vl_dri_scrn->dri_screen->dri);
+   vl_dri_copy_version(&arg.drm_version, &vl_dri_scrn->dri_screen->drm);
+   arg.api = NULL;
+
+   vl_dri_scrn->base.pscreen = vl_dri_scrn->api->create_screen(vl_dri_scrn->api,
+                                                               vl_dri_scrn->dri_screen->fd,
+                                                               &arg.base);
+
+   if (!vl_dri_scrn->base.pscreen) {
+      FREE(vl_dri_scrn);
+      return NULL;
+   }
+
+   vl_dri_scrn->visual = XDefaultVisual(display, screen);
+   vl_dri_scrn->api_hooks = arg.api;
+   vl_dri_scrn->base.pscreen->flush_frontbuffer = vl_dri_flush_frontbuffer;
+   /* XXX: Safe to call this while unlocked? */
+   vl_dri_scrn->base.format = vl_dri_scrn->api_hooks->front_srf_locked(vl_dri_scrn->base.pscreen)->format;
+
+   return &vl_dri_scrn->base;
+}
+
+void vl_screen_destroy(struct vl_screen *vscreen)
+{
+   struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)vscreen;
+
+   assert(vscreen);
+
+   vl_dri_scrn->base.pscreen->destroy(vl_dri_scrn->base.pscreen);
+   driDestroyScreen(vl_dri_scrn->dri_screen);
+   FREE(vl_dri_scrn);
+}
+
+struct vl_context*
+vl_video_create(struct vl_screen *vscreen,
+                enum pipe_video_profile profile,
+                enum pipe_video_chroma_format chroma_format,
+                unsigned width, unsigned height)
+{
+   struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)vscreen;
+   struct vl_dri_context *vl_dri_ctx;
+
+   vl_dri_ctx = CALLOC_STRUCT(vl_dri_context);
+   if (!vl_dri_ctx)
+      return NULL;
+
+   /* XXX: Is default visual correct/sufficient here? */
+   driCreateContext(vl_dri_scrn->dri_screen, vl_dri_scrn->visual, &vl_dri_ctx->dri_context);
+
+   if (!vl_dri_scrn->api->create_video_context) {
+      debug_printf("[G3DVL] No video support found on %s/%s.\n",
+                   vl_dri_scrn->base.pscreen->get_vendor(vl_dri_scrn->base.pscreen),
+                   vl_dri_scrn->base.pscreen->get_name(vl_dri_scrn->base.pscreen));
+      FREE(vl_dri_ctx);
+      return NULL;
+   }
+
+   vl_dri_ctx->base.vpipe = vl_dri_scrn->api->create_video_context(vl_dri_scrn->api,
+                                                                   vscreen->pscreen,
+                                                                   profile, chroma_format,
+                                                                   width, height);
+
+   if (!vl_dri_ctx->base.vpipe) {
+      FREE(vl_dri_ctx);
+      return NULL;
+   }
+
+   vl_dri_ctx->base.vpipe->priv = vl_dri_ctx;
+   vl_dri_ctx->base.vscreen = vscreen;
+   vl_dri_ctx->fd = vl_dri_scrn->dri_screen->fd;
+   vl_dri_ctx->lock = (drmLock*)&vl_dri_scrn->dri_screen->sarea->lock;
+
+   return &vl_dri_ctx->base;
+}
+
+void vl_video_destroy(struct vl_context *vctx)
+{
+   struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)vctx;
+
+   assert(vctx);
+
+   vl_dri_ctx->base.vpipe->destroy(vl_dri_ctx->base.vpipe);
+   FREE(vl_dri_ctx);
+}
diff --git a/src/gallium/winsys/g3dvl/vl_winsys.h b/src/gallium/winsys/g3dvl/vl_winsys.h
index b4fa0d67a1b..d95e9c58335 100644
--- a/src/gallium/winsys/g3dvl/vl_winsys.h
+++ b/src/gallium/winsys/g3dvl/vl_winsys.h
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #ifndef vl_winsys_h
@@ -35,17 +35,32 @@
 struct pipe_screen;
 struct pipe_video_context;
 
-struct pipe_screen*
+struct vl_screen
+{
+   enum pipe_format format;
+   struct pipe_screen *pscreen;
+};
+
+struct vl_context
+{
+   struct vl_screen *vscreen;
+   struct pipe_video_context *vpipe;
+};
+
+struct vl_screen*
 vl_screen_create(Display *display, int screen);
 
-struct pipe_video_context*
-vl_video_create(Display *display, int screen,
-                struct pipe_screen *p_screen,
+void vl_screen_destroy(struct vl_screen *vscreen);
+
+struct vl_context*
+vl_video_create(struct vl_screen *vscreen,
                 enum pipe_video_profile profile,
                 enum pipe_video_chroma_format chroma_format,
                 unsigned width, unsigned height);
 
+void vl_video_destroy(struct vl_context *vctx);
+
 Drawable
-vl_video_bind_drawable(struct pipe_video_context *vpipe, Drawable drawable);
+vl_video_bind_drawable(struct vl_context *vctx, Drawable drawable);
 
 #endif
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 08067aad64c..2b32f07c9e8 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #include <vl_winsys.h>
@@ -36,7 +36,12 @@
 #include <softpipe/sp_video_context.h>
 #include <softpipe/sp_texture.h>
 
-/* pipe_winsys implementation */
+/* TODO: Find a good way to calculate this */
+static enum pipe_format VisualToPipe(Visual *visual)
+{
+   assert(visual);
+   return PIPE_FORMAT_X8R8G8B8_UNORM;
+}
 
 struct xsp_pipe_winsys
 {
@@ -48,9 +53,9 @@ struct xsp_pipe_winsys
 
 struct xsp_context
 {
-   Drawable drawable;
+   struct vl_context base;
 
-   void (*pipe_destroy)(struct pipe_video_context *vpipe);
+   Drawable drawable;
 };
 
 struct xsp_buffer
@@ -218,48 +223,37 @@ static void xsp_destroy(struct pipe_winsys *pws)
    FREE(xsp_winsys);
 }
 
-/* Called through pipe_video_context::destroy() */
-static void xsp_pipe_destroy(struct pipe_video_context *vpipe)
-{
-   struct xsp_context *xsp_context;
-
-   assert(vpipe);
-
-   xsp_context = vpipe->priv;
-
-   /* Call the original destroy */
-   xsp_context->pipe_destroy(vpipe);
-
-   FREE(xsp_context);
-}
-
-/* Show starts here */
-
 Drawable
-vl_video_bind_drawable(struct pipe_video_context *vpipe, Drawable drawable)
+vl_video_bind_drawable(struct vl_context *vctx, Drawable drawable)
 {
-   struct xsp_context *xsp_context;
+   struct xsp_context *xsp_context = (struct xsp_context*)vctx;
    Drawable old_drawable;
 
-   assert(vpipe);
+   assert(vctx);
 
-   xsp_context = vpipe->priv;
    old_drawable = xsp_context->drawable;
    xsp_context->drawable = drawable;
 
    return old_drawable;
 }
 
-struct pipe_screen*
+struct vl_screen*
 vl_screen_create(Display *display, int screen)
 {
+   struct vl_screen *vscreen;
    struct xsp_pipe_winsys *xsp_winsys;
 
    assert(display);
 
+   vscreen = CALLOC_STRUCT(vl_screen);
+   if (!vscreen)
+      return NULL;
+
    xsp_winsys = CALLOC_STRUCT(xsp_pipe_winsys);
-   if (!xsp_winsys)
+   if (!xsp_winsys) {
+      FREE(vscreen);
       return NULL;
+   }
 
    xsp_winsys->base.buffer_create = xsp_buffer_create;
    xsp_winsys->base.user_buffer_create = xsp_user_buffer_create;
@@ -291,17 +285,36 @@ vl_screen_create(Display *display, int screen)
 
    if (!xsp_winsys->fbimage) {
       FREE(xsp_winsys);
+      FREE(vscreen);
       return NULL;
    }
 
    XInitImage(xsp_winsys->fbimage);
 
-   return softpipe_create_screen(&xsp_winsys->base);
+   vscreen->pscreen = softpipe_create_screen(&xsp_winsys->base);
+
+   if (!vscreen->pscreen) {
+      FREE(vscreen);
+      XDestroyImage(xsp_winsys->fbimage);
+      FREE(xsp_winsys);
+      return NULL;
+   }
+
+   vscreen->format = VisualToPipe(XDefaultVisual(display, screen));
+
+   return vscreen;
+}
+
+void vl_screen_destroy(struct vl_screen *vscreen)
+{
+   assert(vscreen);
+
+   vscreen->pscreen->destroy(vscreen->pscreen);
+   FREE(vscreen);
 }
 
-struct pipe_video_context*
-vl_video_create(Display *display, int screen,
-                struct pipe_screen *p_screen,
+struct vl_context*
+vl_video_create(struct vl_screen *vscreen,
                 enum pipe_video_profile profile,
                 enum pipe_video_chroma_format chroma_format,
                 unsigned width, unsigned height)
@@ -309,10 +322,10 @@ vl_video_create(Display *display, int screen,
    struct pipe_video_context *vpipe;
    struct xsp_context *xsp_context;
 
-   assert(p_screen);
+   assert(vscreen);
    assert(width && height);
 
-   vpipe = sp_video_create(p_screen, profile, chroma_format, width, height);
+   vpipe = sp_video_create(vscreen->pscreen, profile, chroma_format, width, height);
    if (!vpipe)
       return NULL;
 
@@ -322,11 +335,17 @@ vl_video_create(Display *display, int screen,
       return NULL;
    }
 
-   /* Override this so we can free our xsp_context when the pipe is freed */
-   xsp_context->pipe_destroy = vpipe->destroy;
-   vpipe->destroy = xsp_pipe_destroy;
-
    vpipe->priv = xsp_context;
+   xsp_context->base.vpipe = vpipe;
+   xsp_context->base.vscreen = vscreen;
+
+   return &xsp_context->base;
+}
+
+void vl_video_destroy(struct vl_context *vctx)
+{
+   assert(vctx);
 
-   return vpipe;
+   vctx->vpipe->destroy(vctx->vpipe);
+   FREE(vctx);
 }
-- 
cgit v1.2.3


From d18bd04dde2aca78afd8cec4d9dc4b2fd172ad38 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Mon, 2 Nov 2009 21:22:14 -0500
Subject: g3dvl: Put misc helpers in u_video.h.

---
 src/gallium/auxiliary/util/u_video.h            | 71 +++++++++++++++++++++++++
 src/gallium/drivers/softpipe/sp_video_context.c |  5 +-
 src/gallium/include/pipe/p_video_state.h        | 44 ++-------------
 3 files changed, 79 insertions(+), 41 deletions(-)
 create mode 100644 src/gallium/auxiliary/util/u_video.h

diff --git a/src/gallium/auxiliary/util/u_video.h b/src/gallium/auxiliary/util/u_video.h
new file mode 100644
index 00000000000..78cceb6bcf2
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_video.h
@@ -0,0 +1,71 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef U_VIDEO_H
+#define U_VIDEO_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <pipe/p_defines.h>
+
+/* u_reduce_video_profile() needs these */
+#include <pipe/p_compiler.h>
+#include <util/u_debug.h>
+
+static INLINE enum pipe_video_codec
+u_reduce_video_profile(enum pipe_video_profile profile)
+{
+   switch (profile)
+   {
+      case PIPE_VIDEO_PROFILE_MPEG1:
+      case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
+      case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
+         return PIPE_VIDEO_CODEC_MPEG12;
+
+      case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
+      case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
+         return PIPE_VIDEO_CODEC_MPEG4;
+
+      case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
+      case PIPE_VIDEO_PROFILE_VC1_MAIN:
+      case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
+         return PIPE_VIDEO_CODEC_VC1;
+
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+         return PIPE_VIDEO_CODEC_MPEG4_AVC;
+
+      default:
+         assert(0);
+         return PIPE_VIDEO_CODEC_UNKNOWN;
+   }
+}
+
+#endif /* U_VIDEO_H */
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index f4f34e7a905..e6c8affae92 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -29,8 +29,9 @@
 #include <pipe/p_inlines.h>
 #include <util/u_memory.h>
 #include <util/u_rect.h>
-#include "softpipe/sp_winsys.h"
-#include "softpipe/sp_texture.h"
+#include <util/u_video.h>
+#include "sp_winsys.h"
+#include "sp_texture.h"
 
 static void
 sp_mpeg12_destroy(struct pipe_video_context *vpipe)
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 4da26d608cf..9ff6bdaa2b6 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,16 +22,12 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #ifndef PIPE_VIDEO_STATE_H
 #define PIPE_VIDEO_STATE_H
 
-/* u_reduce_video_profile() needs these */
-#include <pipe/p_compiler.h>
-#include <util/u_debug.h>
-
 #include <pipe/p_defines.h>
 #include <pipe/p_format.h>
 #include <pipe/p_refcnt.h>
@@ -65,36 +61,6 @@ struct pipe_video_rect
    unsigned x, y, w, h;
 };
 
-static INLINE enum pipe_video_codec
-u_reduce_video_profile(enum pipe_video_profile profile)
-{
-   switch (profile)
-   {
-      case PIPE_VIDEO_PROFILE_MPEG1:
-      case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
-      case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
-         return PIPE_VIDEO_CODEC_MPEG12;
-
-      case PIPE_VIDEO_PROFILE_MPEG4_SIMPLE:
-      case PIPE_VIDEO_PROFILE_MPEG4_ADVANCED_SIMPLE:
-         return PIPE_VIDEO_CODEC_MPEG4;
-
-      case PIPE_VIDEO_PROFILE_VC1_SIMPLE:
-      case PIPE_VIDEO_PROFILE_VC1_MAIN:
-      case PIPE_VIDEO_PROFILE_VC1_ADVANCED:
-         return PIPE_VIDEO_CODEC_VC1;
-
-      case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
-      case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
-      case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
-         return PIPE_VIDEO_CODEC_MPEG4_AVC;
-
-      default:
-         assert(0);
-         return PIPE_VIDEO_CODEC_UNKNOWN;
-   }
-}
-
 enum pipe_mpeg12_picture_type
 {
    PIPE_MPEG12_PICTURE_TYPE_FIELD_TOP,
-- 
cgit v1.2.3


From dcccbfd14e46a4b69d3e89666d57f07589eaef9b Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Mon, 2 Nov 2009 22:03:09 -0500
Subject: g3dvl: Wrap macroblock block buffer in a pipe user buffer.

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  7 ++++++-
 src/gallium/include/pipe/p_video_state.h         |  2 +-
 src/gallium/state_trackers/xorg/xvmc/surface.c   | 13 ++++++++++---
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 6b3614821cc..bc4ab5fb357 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -1507,6 +1507,8 @@ static void
 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
                 struct pipe_mpeg12_macroblock *mb)
 {
+   void *blocks;
+
    assert(r);
    assert(mb);
    assert(r->num_macroblocks < r->macroblocks_per_batch);
@@ -1514,7 +1516,10 @@ grab_macroblock(struct vl_mpeg12_mc_renderer *r,
    memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
           sizeof(struct pipe_mpeg12_macroblock));
 
-   grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
+   blocks = pipe_buffer_map(r->pipe->screen, mb->blocks,
+                            PIPE_BUFFER_USAGE_CPU_READ);
+   grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, blocks);
+   pipe_buffer_unmap(r->pipe->screen, mb->blocks);
 
    ++r->num_macroblocks;
 }
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 9ff6bdaa2b6..23f2cb74270 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -108,7 +108,7 @@ struct pipe_mpeg12_macroblock
    enum pipe_mpeg12_dct_type dct_type;
    signed pmv[2][2][2];
    unsigned cbp;
-   void *blocks;
+   struct pipe_buffer *blocks;
 };
 
 #if 0
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index e1779eafa27..24c413bc04d 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -31,6 +31,7 @@
 #include <pipe/p_video_context.h>
 #include <pipe/p_video_state.h>
 #include <pipe/p_state.h>
+#include <pipe/p_inlines.h>
 #include <util/u_memory.h>
 #include "xvmc_private.h"
 
@@ -132,7 +133,8 @@ CreateOrResizeBackBuffer(struct vl_context *vctx, unsigned int width, unsigned i
 }
 
 static void
-MacroBlocksToPipe(const XvMCMacroBlockArray *xvmc_macroblocks,
+MacroBlocksToPipe(struct pipe_screen *screen,
+                  const XvMCMacroBlockArray *xvmc_macroblocks,
                   const XvMCBlockArray *xvmc_blocks,
                   unsigned int first_macroblock,
                   unsigned int num_macroblocks,
@@ -167,7 +169,8 @@ MacroBlocksToPipe(const XvMCMacroBlockArray *xvmc_macroblocks,
                pipe_macroblocks->pmv[j][k][l] = xvmc_mb->PMV[j][k][l];
 
       pipe_macroblocks->cbp = xvmc_mb->coded_block_pattern;
-      pipe_macroblocks->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
+      pipe_macroblocks->blocks = pipe_user_buffer_create(screen, xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES,
+                                                         BLOCK_SIZE_BYTES);
 
       ++pipe_macroblocks;
       ++xvmc_mb;
@@ -232,6 +235,7 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    XvMCSurfacePrivate *past_surface_priv;
    XvMCSurfacePrivate *future_surface_priv;
    struct pipe_mpeg12_macroblock pipe_macroblocks[num_macroblocks];
+   unsigned int i;
 
    assert(dpy);
 
@@ -275,13 +279,16 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    p_vsfc = past_surface ? past_surface_priv->pipe_vsfc : NULL;
    f_vsfc = future_surface ? future_surface_priv->pipe_vsfc : NULL;
 
-   MacroBlocksToPipe(macroblocks, blocks, first_macroblock,
+   MacroBlocksToPipe(vpipe->screen, macroblocks, blocks, first_macroblock,
                      num_macroblocks, pipe_macroblocks);
 
    vpipe->set_decode_target(vpipe, t_vsfc);
    vpipe->decode_macroblocks(vpipe, p_vsfc, f_vsfc, num_macroblocks,
                              &pipe_macroblocks->base, target_surface_priv->render_fence);
 
+   for (i = 0; i < num_macroblocks; ++i)
+      vpipe->screen->buffer_destroy(pipe_macroblocks[i].blocks);
+
    return Success;
 }
 
-- 
cgit v1.2.3


From 8ebc795ec4265577fe67185a32d8985debda076a Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Thu, 5 Nov 2009 23:58:29 -0500
Subject: g3dvl: Shared drm winsys bits.

---
 src/driclient/include/driclient.h                  |  97 ----
 src/driclient/include/xf86dri.h                    | 119 ----
 src/driclient/src/XF86dri.c                        | 619 ---------------------
 src/driclient/src/driclient.c                      | 310 -----------
 src/driclient/src/xf86dristr.h                     | 342 ------------
 src/gallium/winsys/Makefile                        |   2 +-
 src/gallium/winsys/g3dvl/Makefile                  |   3 +-
 src/gallium/winsys/g3dvl/dri/Makefile              |  14 +
 src/gallium/winsys/g3dvl/dri/XF86dri.c             | 619 +++++++++++++++++++++
 src/gallium/winsys/g3dvl/dri/dri_winsys.c          | 393 +++++++++++++
 src/gallium/winsys/g3dvl/dri/driclient.c           | 310 +++++++++++
 src/gallium/winsys/g3dvl/dri/driclient.h           |  97 ++++
 src/gallium/winsys/g3dvl/dri/xf86dri.h             | 119 ++++
 src/gallium/winsys/g3dvl/dri/xf86dristr.h          | 342 ++++++++++++
 src/gallium/winsys/g3dvl/drm/Makefile              |  12 +
 src/gallium/winsys/g3dvl/drm/Makefile.template     |  72 +++
 src/gallium/winsys/g3dvl/drm/nouveau/Makefile      |  26 +
 src/gallium/winsys/g3dvl/nouveau/Makefile          |  98 ----
 .../winsys/g3dvl/nouveau/drm_nouveau_winsys.c      | 393 -------------
 .../winsys/g3dvl/nouveau/nouveau_context_vl.c      | 172 ------
 .../winsys/g3dvl/nouveau/nouveau_context_vl.h      |  39 --
 .../winsys/g3dvl/nouveau/nouveau_screen_vl.c       |  88 ---
 .../winsys/g3dvl/nouveau/nouveau_screen_vl.h       |  20 -
 .../winsys/g3dvl/nouveau/nouveau_swapbuffers.c     |  94 ----
 .../winsys/g3dvl/nouveau/nouveau_swapbuffers.h     |  10 -
 25 files changed, 2007 insertions(+), 2403 deletions(-)
 delete mode 100644 src/driclient/include/driclient.h
 delete mode 100644 src/driclient/include/xf86dri.h
 delete mode 100644 src/driclient/src/XF86dri.c
 delete mode 100644 src/driclient/src/driclient.c
 delete mode 100644 src/driclient/src/xf86dristr.h
 create mode 100644 src/gallium/winsys/g3dvl/dri/Makefile
 create mode 100644 src/gallium/winsys/g3dvl/dri/XF86dri.c
 create mode 100644 src/gallium/winsys/g3dvl/dri/dri_winsys.c
 create mode 100644 src/gallium/winsys/g3dvl/dri/driclient.c
 create mode 100644 src/gallium/winsys/g3dvl/dri/driclient.h
 create mode 100644 src/gallium/winsys/g3dvl/dri/xf86dri.h
 create mode 100644 src/gallium/winsys/g3dvl/dri/xf86dristr.h
 create mode 100644 src/gallium/winsys/g3dvl/drm/Makefile
 create mode 100644 src/gallium/winsys/g3dvl/drm/Makefile.template
 create mode 100644 src/gallium/winsys/g3dvl/drm/nouveau/Makefile
 delete mode 100644 src/gallium/winsys/g3dvl/nouveau/Makefile
 delete mode 100644 src/gallium/winsys/g3dvl/nouveau/drm_nouveau_winsys.c
 delete mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c
 delete mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h
 delete mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c
 delete mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h
 delete mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c
 delete mode 100644 src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h

diff --git a/src/driclient/include/driclient.h b/src/driclient/include/driclient.h
deleted file mode 100644
index d3915250392..00000000000
--- a/src/driclient/include/driclient.h
+++ /dev/null
@@ -1,97 +0,0 @@
-#ifndef driclient_h
-#define driclient_h
-
-#include <stdint.h>
-#include <X11/Xlib.h>
-#include <drm_sarea.h>
-#include "xf86dri.h"
-
-/* TODO: Bring in DRI XML options */
-
-typedef struct dri_version
-{
-	int major;
-	int minor;
-	int patch;
-} dri_version_t;
-
-typedef struct dri_screen
-{
-	Display			*display;
-	unsigned int		num;
-	dri_version_t		ddx, dri, drm;
-	int			draw_lock_id;
-	int			fd;
-	drm_sarea_t		*sarea;
-	void			*drawable_hash;
-	void			*private;
-} dri_screen_t;
-
-struct dri_context;
-
-typedef struct dri_drawable
-{
-	drm_drawable_t		drm_drawable;
-	Drawable		x_drawable;
-	unsigned int		sarea_index;
-	unsigned int		*sarea_stamp;
-	unsigned int		last_sarea_stamp;
-	int			x, y, w, h;
-	int			back_x, back_y;
-	int			num_cliprects, num_back_cliprects;
-	drm_clip_rect_t		*cliprects, *back_cliprects;
-	dri_screen_t		*dri_screen;
-	unsigned int		refcount;
-	void			*private;
-} dri_drawable_t;
-
-typedef struct dri_context
-{
-	XID			id;
-	drm_context_t		drm_context;
-	dri_screen_t		*dri_screen;
-	void			*private;
-} dri_context_t;
-
-typedef struct dri_framebuffer
-{
-	drm_handle_t		drm_handle;
-	int			base, size, stride;
-	int			private_size;
-	void			*private;
-} dri_framebuffer_t;
-
-int driCreateScreen(Display *display, int screen, dri_screen_t **dri_screen, dri_framebuffer_t *dri_framebuf);
-int driDestroyScreen(dri_screen_t *dri_screen);
-int driCreateDrawable(dri_screen_t *dri_screen, Drawable drawable, dri_drawable_t **dri_drawable);
-int driUpdateDrawableInfo(dri_drawable_t *dri_drawable);
-int driDestroyDrawable(dri_drawable_t *dri_drawable);
-int driCreateContext(dri_screen_t *dri_screen, Visual *visual, dri_context_t **dri_context);
-int driDestroyContext(dri_context_t *dri_context);
-
-#define DRI_VALIDATE_DRAWABLE_INFO_ONCE(dri_drawable)					\
-do											\
-{											\
-	if (*(dri_drawable->sarea_stamp) != dri_drawable->last_sarea_stamp)		\
-		driUpdateDrawableInfo(dri_drawable);					\
-} while (0)
-
-#define DRI_VALIDATE_DRAWABLE_INFO(dri_screen, dri_drawable)					\
-do												\
-{												\
-	while (*(dri_drawable->sarea_stamp) != dri_drawable->last_sarea_stamp)			\
-	{											\
-		register unsigned int hwContext = dri_screen->sarea->lock.lock &		\
-		~(DRM_LOCK_HELD | DRM_LOCK_CONT);						\
-		DRM_UNLOCK(dri_screen->fd, &dri_screen->sarea->lock, hwContext);		\
-												\
-		DRM_SPINLOCK(&dri_screen->sarea->drawable_lock, dri_screen->draw_lock_id);	\
-		DRI_VALIDATE_DRAWABLE_INFO_ONCE(dri_drawable);					\
-		DRM_SPINUNLOCK(&dri_screen->sarea->drawable_lock, dri_screen->draw_lock_id);	\
-												\
-		DRM_LIGHT_LOCK(dri_screen->fd, &dri_screen->sarea->lock, hwContext);		\
-	}											\
-} while (0)
-
-#endif
-
diff --git a/src/driclient/include/xf86dri.h b/src/driclient/include/xf86dri.h
deleted file mode 100644
index baf80a7a9dd..00000000000
--- a/src/driclient/include/xf86dri.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/**************************************************************************
-
-Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
-Copyright 2000 VA Linux Systems, Inc.
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sub license, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial portions
-of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
-ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/**
- * \file xf86dri.h
- * Protocol numbers and function prototypes for DRI X protocol.
- *
- * \author Kevin E. Martin <martin@valinux.com>
- * \author Jens Owen <jens@tungstengraphics.com>
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- */
-
-#ifndef _XF86DRI_H_
-#define _XF86DRI_H_
-
-#include <X11/Xfuncproto.h>
-#include <xf86drm.h>
-
-#define X_XF86DRIQueryVersion			0
-#define X_XF86DRIQueryDirectRenderingCapable	1
-#define X_XF86DRIOpenConnection			2
-#define X_XF86DRICloseConnection		3
-#define X_XF86DRIGetClientDriverName		4
-#define X_XF86DRICreateContext			5
-#define X_XF86DRIDestroyContext			6
-#define X_XF86DRICreateDrawable			7
-#define X_XF86DRIDestroyDrawable		8
-#define X_XF86DRIGetDrawableInfo		9
-#define X_XF86DRIGetDeviceInfo			10
-#define X_XF86DRIAuthConnection                 11
-#define X_XF86DRIOpenFullScreen                 12   /* Deprecated */
-#define X_XF86DRICloseFullScreen                13   /* Deprecated */
-
-#define XF86DRINumberEvents		0
-
-#define XF86DRIClientNotLocal		0
-#define XF86DRIOperationNotSupported	1
-#define XF86DRINumberErrors		(XF86DRIOperationNotSupported + 1)
-
-#ifndef _XF86DRI_SERVER_
-
-_XFUNCPROTOBEGIN
-
-Bool XF86DRIQueryExtension( Display *dpy, int *event_base, int *error_base );
-
-Bool XF86DRIQueryVersion( Display *dpy, int *majorVersion, int *minorVersion,
-    int *patchVersion );
-
-Bool XF86DRIQueryDirectRenderingCapable( Display *dpy, int screen,
-    Bool *isCapable );
-
-Bool XF86DRIOpenConnection( Display *dpy, int screen, drm_handle_t *hSAREA,
-    char **busIDString );
-
-Bool XF86DRIAuthConnection( Display *dpy, int screen, drm_magic_t magic );
-
-Bool XF86DRICloseConnection( Display *dpy, int screen );
-
-Bool XF86DRIGetClientDriverName( Display *dpy, int screen,
-    int *ddxDriverMajorVersion, int *ddxDriverMinorVersion,
-    int *ddxDriverPatchVersion, char **clientDriverName );
-
-Bool XF86DRICreateContext( Display *dpy, int screen, Visual *visual,
-    XID *ptr_to_returned_context_id, drm_context_t *hHWContext );
-
-Bool XF86DRICreateContextWithConfig( Display *dpy, int screen, int configID,
-    XID *ptr_to_returned_context_id, drm_context_t *hHWContext );
-
-Bool XF86DRIDestroyContext( Display *dpy, int screen,
-    XID context_id );
-
-Bool XF86DRICreateDrawable( Display *dpy, int screen,
-    Drawable drawable, drm_drawable_t *hHWDrawable );
-
-Bool XF86DRIDestroyDrawable( Display *dpy, int screen, 
-    Drawable drawable);
-
-Bool XF86DRIGetDrawableInfo( Display *dpy, int screen, Drawable drawable,
-    unsigned int *index, unsigned int *stamp, 
-    int *X, int *Y, int *W, int *H,
-    int *numClipRects, drm_clip_rect_t ** pClipRects,
-    int *backX, int *backY,
-    int *numBackClipRects, drm_clip_rect_t **pBackClipRects );
-
-Bool XF86DRIGetDeviceInfo( Display *dpy, int screen,
-    drm_handle_t *hFrameBuffer, int *fbOrigin, int *fbSize,
-    int *fbStride, int *devPrivateSize, void **pDevPrivate );
-
-_XFUNCPROTOEND
-
-#endif /* _XF86DRI_SERVER_ */
-
-#endif /* _XF86DRI_H_ */
-
diff --git a/src/driclient/src/XF86dri.c b/src/driclient/src/XF86dri.c
deleted file mode 100644
index 9e359a92384..00000000000
--- a/src/driclient/src/XF86dri.c
+++ /dev/null
@@ -1,619 +0,0 @@
-/**************************************************************************
-
-Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
-Copyright 2000 VA Linux Systems, Inc.
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sub license, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial portions
-of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
-ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- *   Kevin E. Martin <martin@valinux.com>
- *   Jens Owen <jens@tungstengraphics.com>
- *   Rickard E. (Rik) Faith <faith@valinux.com>
- *
- */
-
-/* THIS IS NOT AN X CONSORTIUM STANDARD */
-
-#define NEED_REPLIES
-#include <X11/Xlibint.h>
-#include <X11/extensions/Xext.h>
-#include <X11/extensions/extutil.h>
-#include "xf86dristr.h"
-
-static XExtensionInfo _xf86dri_info_data;
-static XExtensionInfo *xf86dri_info = &_xf86dri_info_data;
-static char xf86dri_extension_name[] = XF86DRINAME;
-
-#define XF86DRICheckExtension(dpy,i,val) \
-  XextCheckExtension (dpy, i, xf86dri_extension_name, val)
-
-/*****************************************************************************
- *                                                                           *
- *			   private utility routines                          *
- *                                                                           *
- *****************************************************************************/
-
-static int close_display(Display *dpy, XExtCodes *extCodes);
-static /* const */ XExtensionHooks xf86dri_extension_hooks = {
-    NULL,				/* create_gc */
-    NULL,				/* copy_gc */
-    NULL,				/* flush_gc */
-    NULL,				/* free_gc */
-    NULL,				/* create_font */
-    NULL,				/* free_font */
-    close_display,			/* close_display */
-    NULL,				/* wire_to_event */
-    NULL,				/* event_to_wire */
-    NULL,				/* error */
-    NULL,				/* error_string */
-};
-
-static XEXT_GENERATE_FIND_DISPLAY (find_display, xf86dri_info, 
-				   xf86dri_extension_name, 
-				   &xf86dri_extension_hooks, 
-				   0, NULL)
-
-static XEXT_GENERATE_CLOSE_DISPLAY (close_display, xf86dri_info)
-
-
-/*****************************************************************************
- *                                                                           *
- *		    public XFree86-DRI Extension routines                    *
- *                                                                           *
- *****************************************************************************/
-
-#if 0
-#include <stdio.h>
-#define TRACE(msg)  fprintf(stderr,"XF86DRI%s\n", msg);
-#else
-#define TRACE(msg)
-#endif
-
-#define PUBLIC
-
-PUBLIC Bool XF86DRIQueryExtension (dpy, event_basep, error_basep)
-    Display *dpy;
-    int *event_basep, *error_basep;
-{
-    XExtDisplayInfo *info = find_display (dpy);
-
-    TRACE("QueryExtension...");
-    if (XextHasExtension(info)) {
-	*event_basep = info->codes->first_event;
-	*error_basep = info->codes->first_error;
-        TRACE("QueryExtension... return True");
-	return True;
-    } else {
-        TRACE("QueryExtension... return False");
-	return False;
-    }
-}
-
-PUBLIC Bool XF86DRIQueryVersion(dpy, majorVersion, minorVersion, patchVersion)
-    Display* dpy;
-    int* majorVersion; 
-    int* minorVersion;
-    int* patchVersion;
-{
-    XExtDisplayInfo *info = find_display (dpy);
-    xXF86DRIQueryVersionReply rep;
-    xXF86DRIQueryVersionReq *req;
-
-    TRACE("QueryVersion...");
-    XF86DRICheckExtension (dpy, info, False);
-
-    LockDisplay(dpy);
-    GetReq(XF86DRIQueryVersion, req);
-    req->reqType = info->codes->major_opcode;
-    req->driReqType = X_XF86DRIQueryVersion;
-    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
-	UnlockDisplay(dpy);
-	SyncHandle();
-        TRACE("QueryVersion... return False");
-	return False;
-    }
-    *majorVersion = rep.majorVersion;
-    *minorVersion = rep.minorVersion;
-    *patchVersion = rep.patchVersion;
-    UnlockDisplay(dpy);
-    SyncHandle();
-    TRACE("QueryVersion... return True");
-    return True;
-}
-
-PUBLIC Bool XF86DRIQueryDirectRenderingCapable(dpy, screen, isCapable)
-    Display* dpy;
-    int screen;
-    Bool* isCapable;
-{
-    XExtDisplayInfo *info = find_display (dpy);
-    xXF86DRIQueryDirectRenderingCapableReply rep;
-    xXF86DRIQueryDirectRenderingCapableReq *req;
-
-    TRACE("QueryDirectRenderingCapable...");
-    XF86DRICheckExtension (dpy, info, False);
-
-    LockDisplay(dpy);
-    GetReq(XF86DRIQueryDirectRenderingCapable, req);
-    req->reqType = info->codes->major_opcode;
-    req->driReqType = X_XF86DRIQueryDirectRenderingCapable;
-    req->screen = screen;
-    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
-	UnlockDisplay(dpy);
-	SyncHandle();
-        TRACE("QueryDirectRenderingCapable... return False");
-	return False;
-    }
-    *isCapable = rep.isCapable;
-    UnlockDisplay(dpy);
-    SyncHandle();
-    TRACE("QueryDirectRenderingCapable... return True");
-    return True;
-}
-
-PUBLIC Bool XF86DRIOpenConnection(dpy, screen, hSAREA, busIdString)
-    Display* dpy;
-    int screen;
-    drm_handle_t * hSAREA;
-    char **busIdString;
-{
-    XExtDisplayInfo *info = find_display (dpy);
-    xXF86DRIOpenConnectionReply rep;
-    xXF86DRIOpenConnectionReq *req;
-
-    TRACE("OpenConnection...");
-    XF86DRICheckExtension (dpy, info, False);
-
-    LockDisplay(dpy);
-    GetReq(XF86DRIOpenConnection, req);
-    req->reqType = info->codes->major_opcode;
-    req->driReqType = X_XF86DRIOpenConnection;
-    req->screen = screen;
-    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
-	UnlockDisplay(dpy);
-	SyncHandle();
-        TRACE("OpenConnection... return False");
-	return False;
-    }
-
-    *hSAREA = rep.hSAREALow;
-    if (sizeof(drm_handle_t) == 8) {
-       int shift = 32; /* var to prevent warning on next line */
-       *hSAREA |= ((drm_handle_t) rep.hSAREAHigh) << shift;
-    }
-
-    if (rep.length) {
-        if (!(*busIdString = (char *)Xcalloc(rep.busIdStringLength + 1, 1))) {
-            _XEatData(dpy, ((rep.busIdStringLength+3) & ~3));
-            UnlockDisplay(dpy);
-            SyncHandle();
-            TRACE("OpenConnection... return False");
-            return False;
-        }
-	_XReadPad(dpy, *busIdString, rep.busIdStringLength);
-    } else {
-        *busIdString = NULL;
-    }
-    UnlockDisplay(dpy);
-    SyncHandle();
-    TRACE("OpenConnection... return True");
-    return True;
-}
-
-PUBLIC Bool XF86DRIAuthConnection(dpy, screen, magic)
-    Display* dpy;
-    int screen;
-    drm_magic_t magic;
-{
-    XExtDisplayInfo *info = find_display (dpy);
-    xXF86DRIAuthConnectionReq *req;
-    xXF86DRIAuthConnectionReply rep;
-
-    TRACE("AuthConnection...");
-    XF86DRICheckExtension (dpy, info, False);
-
-    LockDisplay(dpy);
-    GetReq(XF86DRIAuthConnection, req);
-    req->reqType = info->codes->major_opcode;
-    req->driReqType = X_XF86DRIAuthConnection;
-    req->screen = screen;
-    req->magic = magic;
-    rep.authenticated = 0;
-    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse) || !rep.authenticated) {
-	UnlockDisplay(dpy);
-	SyncHandle();
-        TRACE("AuthConnection... return False");
-	return False;
-    }
-    UnlockDisplay(dpy);
-    SyncHandle();
-    TRACE("AuthConnection... return True");
-    return True;
-}
-
-PUBLIC Bool XF86DRICloseConnection(dpy, screen)
-    Display* dpy;
-    int screen;
-{
-    XExtDisplayInfo *info = find_display (dpy);
-    xXF86DRICloseConnectionReq *req;
-
-    TRACE("CloseConnection...");
-
-    XF86DRICheckExtension (dpy, info, False);
-
-    LockDisplay(dpy);
-    GetReq(XF86DRICloseConnection, req);
-    req->reqType = info->codes->major_opcode;
-    req->driReqType = X_XF86DRICloseConnection;
-    req->screen = screen;
-    UnlockDisplay(dpy);
-    SyncHandle();
-    TRACE("CloseConnection... return True");
-    return True;
-}
-
-PUBLIC Bool XF86DRIGetClientDriverName(dpy, screen, ddxDriverMajorVersion, 
-	ddxDriverMinorVersion, ddxDriverPatchVersion, clientDriverName)
-    Display* dpy;
-    int screen;
-    int* ddxDriverMajorVersion;
-    int* ddxDriverMinorVersion;
-    int* ddxDriverPatchVersion;
-    char** clientDriverName;
-{
-    XExtDisplayInfo *info = find_display (dpy);
-    xXF86DRIGetClientDriverNameReply rep;
-    xXF86DRIGetClientDriverNameReq *req;
-
-    TRACE("GetClientDriverName...");
-    XF86DRICheckExtension (dpy, info, False);
-
-    LockDisplay(dpy);
-    GetReq(XF86DRIGetClientDriverName, req);
-    req->reqType = info->codes->major_opcode;
-    req->driReqType = X_XF86DRIGetClientDriverName;
-    req->screen = screen;
-    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
-	UnlockDisplay(dpy);
-	SyncHandle();
-        TRACE("GetClientDriverName... return False");
-	return False;
-    }
-
-    *ddxDriverMajorVersion = rep.ddxDriverMajorVersion;
-    *ddxDriverMinorVersion = rep.ddxDriverMinorVersion;
-    *ddxDriverPatchVersion = rep.ddxDriverPatchVersion;
-
-    if (rep.length) {
-        if (!(*clientDriverName = (char *)Xcalloc(rep.clientDriverNameLength + 1, 1))) {
-            _XEatData(dpy, ((rep.clientDriverNameLength+3) & ~3));
-            UnlockDisplay(dpy);
-            SyncHandle();
-            TRACE("GetClientDriverName... return False");
-            return False;
-        }
-	_XReadPad(dpy, *clientDriverName, rep.clientDriverNameLength);
-    } else {
-        *clientDriverName = NULL;
-    }
-    UnlockDisplay(dpy);
-    SyncHandle();
-    TRACE("GetClientDriverName... return True");
-    return True;
-}
-
-PUBLIC Bool XF86DRICreateContextWithConfig(dpy, screen, configID, context,
-	hHWContext)
-    Display* dpy;
-    int screen;
-    int configID;
-    XID* context;
-    drm_context_t * hHWContext;
-{
-    XExtDisplayInfo *info = find_display (dpy);
-    xXF86DRICreateContextReply rep;
-    xXF86DRICreateContextReq *req;
-
-    TRACE("CreateContext...");
-    XF86DRICheckExtension (dpy, info, False);
-
-    LockDisplay(dpy);
-    GetReq(XF86DRICreateContext, req);
-    req->reqType = info->codes->major_opcode;
-    req->driReqType = X_XF86DRICreateContext;
-    req->visual = configID;
-    req->screen = screen;
-    *context = XAllocID(dpy);
-    req->context = *context;
-    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
-	UnlockDisplay(dpy);
-	SyncHandle();
-        TRACE("CreateContext... return False");
-	return False;
-    }
-    *hHWContext = rep.hHWContext;
-    UnlockDisplay(dpy);
-    SyncHandle();
-    TRACE("CreateContext... return True");
-    return True;
-}
-
-PUBLIC Bool XF86DRICreateContext(dpy, screen, visual, context, hHWContext)
-    Display* dpy;
-    int screen;
-    Visual* visual;
-    XID* context;
-    drm_context_t * hHWContext;
-{
-    return XF86DRICreateContextWithConfig( dpy, screen, visual->visualid,
-					   context, hHWContext );
-}
-
-PUBLIC Bool XF86DRIDestroyContext( Display * ndpy, int screen, 
-    XID context )
-{
-    Display * const dpy = (Display *) ndpy;
-    XExtDisplayInfo *info = find_display (dpy);
-    xXF86DRIDestroyContextReq *req;
-
-    TRACE("DestroyContext...");
-    XF86DRICheckExtension (dpy, info, False);
-
-    LockDisplay(dpy);
-    GetReq(XF86DRIDestroyContext, req);
-    req->reqType = info->codes->major_opcode;
-    req->driReqType = X_XF86DRIDestroyContext;
-    req->screen = screen;
-    req->context = context;
-    UnlockDisplay(dpy);
-    SyncHandle();
-    TRACE("DestroyContext... return True");
-    return True;
-}
-
-PUBLIC Bool XF86DRICreateDrawable( Display * ndpy, int screen, 
-    Drawable drawable, drm_drawable_t * hHWDrawable )
-{
-    Display * const dpy = (Display *) ndpy;
-    XExtDisplayInfo *info = find_display (dpy);
-    xXF86DRICreateDrawableReply rep;
-    xXF86DRICreateDrawableReq *req;
-
-    TRACE("CreateDrawable...");
-    XF86DRICheckExtension (dpy, info, False);
-
-    LockDisplay(dpy);
-    GetReq(XF86DRICreateDrawable, req);
-    req->reqType = info->codes->major_opcode;
-    req->driReqType = X_XF86DRICreateDrawable;
-    req->screen = screen;
-    req->drawable = drawable;
-    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
-	UnlockDisplay(dpy);
-	SyncHandle();
-        TRACE("CreateDrawable... return False");
-	return False;
-    }
-    *hHWDrawable = rep.hHWDrawable;
-    UnlockDisplay(dpy);
-    SyncHandle();
-    TRACE("CreateDrawable... return True");
-    return True;
-}
-
-PUBLIC Bool XF86DRIDestroyDrawable( Display * ndpy, int screen,
-    Drawable drawable )
-{
-    Display * const dpy = (Display *) ndpy;
-    XExtDisplayInfo *info = find_display (dpy);
-    xXF86DRIDestroyDrawableReq *req;
-
-    TRACE("DestroyDrawable...");
-    XF86DRICheckExtension (dpy, info, False);
-
-    LockDisplay(dpy);
-    GetReq(XF86DRIDestroyDrawable, req);
-    req->reqType = info->codes->major_opcode;
-    req->driReqType = X_XF86DRIDestroyDrawable;
-    req->screen = screen;
-    req->drawable = drawable;
-    UnlockDisplay(dpy);
-    SyncHandle();
-    TRACE("DestroyDrawable... return True");
-    return True;
-}
-
-PUBLIC Bool XF86DRIGetDrawableInfo(Display* dpy, int screen, Drawable drawable,
-    unsigned int* index, unsigned int* stamp,
-    int* X, int* Y, int* W, int* H,
-    int* numClipRects, drm_clip_rect_t ** pClipRects,
-    int* backX, int* backY,
-    int* numBackClipRects, drm_clip_rect_t ** pBackClipRects )
-{
-    XExtDisplayInfo *info = find_display (dpy);
-    xXF86DRIGetDrawableInfoReply rep;
-    xXF86DRIGetDrawableInfoReq *req;
-    int total_rects;
-
-    TRACE("GetDrawableInfo...");
-    XF86DRICheckExtension (dpy, info, False);
-
-    LockDisplay(dpy);
-    GetReq(XF86DRIGetDrawableInfo, req);
-    req->reqType = info->codes->major_opcode;
-    req->driReqType = X_XF86DRIGetDrawableInfo;
-    req->screen = screen;
-    req->drawable = drawable;
-
-    if (!_XReply(dpy, (xReply *)&rep, 1, xFalse)) 
-    {
-	UnlockDisplay(dpy);
-	SyncHandle();
-        TRACE("GetDrawableInfo... return False");
-	return False;
-    }
-    *index = rep.drawableTableIndex;
-    *stamp = rep.drawableTableStamp;
-    *X = (int)rep.drawableX;
-    *Y = (int)rep.drawableY;
-    *W = (int)rep.drawableWidth;
-    *H = (int)rep.drawableHeight;
-    *numClipRects = rep.numClipRects;
-    total_rects = *numClipRects;
-
-    *backX = rep.backX;
-    *backY = rep.backY;
-    *numBackClipRects = rep.numBackClipRects;
-    total_rects += *numBackClipRects;
-
-#if 0
-    /* Because of the fix in Xserver/GL/dri/xf86dri.c, this check breaks
-     * backwards compatibility (Because of the >> 2 shift) but the fix
-     * enables multi-threaded apps to work.
-     */
-    if (rep.length !=  ((((SIZEOF(xXF86DRIGetDrawableInfoReply) - 
-		       SIZEOF(xGenericReply) + 
-		       total_rects * sizeof(drm_clip_rect_t)) + 3) & ~3) >> 2)) {
-        _XEatData(dpy, rep.length);
-	UnlockDisplay(dpy);
-	SyncHandle();
-        TRACE("GetDrawableInfo... return False");
-        return False;
-    }
-#endif
-
-    if (*numClipRects) {
-       int len = sizeof(drm_clip_rect_t) * (*numClipRects);
-
-       *pClipRects = (drm_clip_rect_t *)Xcalloc(len, 1);
-       if (*pClipRects) 
-	  _XRead(dpy, (char*)*pClipRects, len);
-    } else {
-        *pClipRects = NULL;
-    }
-
-    if (*numBackClipRects) {
-       int len = sizeof(drm_clip_rect_t) * (*numBackClipRects);
-
-       *pBackClipRects = (drm_clip_rect_t *)Xcalloc(len, 1);
-       if (*pBackClipRects) 
-	  _XRead(dpy, (char*)*pBackClipRects, len);
-    } else {
-        *pBackClipRects = NULL;
-    }
-
-    UnlockDisplay(dpy);
-    SyncHandle();
-    TRACE("GetDrawableInfo... return True");
-    return True;
-}
-
-PUBLIC Bool XF86DRIGetDeviceInfo(dpy, screen, hFrameBuffer, 
-	fbOrigin, fbSize, fbStride, devPrivateSize, pDevPrivate)
-    Display* dpy;
-    int screen;
-    drm_handle_t * hFrameBuffer;
-    int* fbOrigin;
-    int* fbSize;
-    int* fbStride;
-    int* devPrivateSize;
-    void** pDevPrivate;
-{
-    XExtDisplayInfo *info = find_display (dpy);
-    xXF86DRIGetDeviceInfoReply rep;
-    xXF86DRIGetDeviceInfoReq *req;
-
-    TRACE("GetDeviceInfo...");
-    XF86DRICheckExtension (dpy, info, False);
-
-    LockDisplay(dpy);
-    GetReq(XF86DRIGetDeviceInfo, req);
-    req->reqType = info->codes->major_opcode;
-    req->driReqType = X_XF86DRIGetDeviceInfo;
-    req->screen = screen;
-    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
-	UnlockDisplay(dpy);
-	SyncHandle();
-        TRACE("GetDeviceInfo... return False");
-	return False;
-    }
-
-    *hFrameBuffer = rep.hFrameBufferLow;
-    if (sizeof(drm_handle_t) == 8) {
-       int shift = 32; /* var to prevent warning on next line */
-       *hFrameBuffer |= ((drm_handle_t) rep.hFrameBufferHigh) << shift;
-    }
-
-    *fbOrigin = rep.framebufferOrigin;
-    *fbSize = rep.framebufferSize;
-    *fbStride = rep.framebufferStride;
-    *devPrivateSize = rep.devPrivateSize;
-
-    if (rep.length) {
-        if (!(*pDevPrivate = (void *)Xcalloc(rep.devPrivateSize, 1))) {
-            _XEatData(dpy, ((rep.devPrivateSize+3) & ~3));
-            UnlockDisplay(dpy);
-            SyncHandle();
-            TRACE("GetDeviceInfo... return False");
-            return False;
-        }
-	_XRead(dpy, (char*)*pDevPrivate, rep.devPrivateSize);
-    } else {
-        *pDevPrivate = NULL;
-    }
-
-    UnlockDisplay(dpy);
-    SyncHandle();
-    TRACE("GetDeviceInfo... return True");
-    return True;
-}
-
-PUBLIC Bool XF86DRIOpenFullScreen(dpy, screen, drawable)
-    Display* dpy;
-    int screen;
-    Drawable drawable;
-{
-    /* This function and the underlying X protocol are deprecated.
-     */
-    (void) dpy;
-    (void) screen;
-    (void) drawable;
-    return False;
-}
-
-PUBLIC Bool XF86DRICloseFullScreen(dpy, screen, drawable)
-    Display* dpy;
-    int screen;
-    Drawable drawable;
-{
-    /* This function and the underlying X protocol are deprecated.
-     */
-    (void) dpy;
-    (void) screen;
-    (void) drawable;
-    return True;
-}
-
diff --git a/src/driclient/src/driclient.c b/src/driclient/src/driclient.c
deleted file mode 100644
index dc2189afec3..00000000000
--- a/src/driclient/src/driclient.c
+++ /dev/null
@@ -1,310 +0,0 @@
-#include "driclient.h"
-#include <assert.h>
-#include <stdlib.h>
-
-int driCreateScreen(Display *display, int screen, dri_screen_t **dri_screen, dri_framebuffer_t *dri_framebuf)
-{
-	int		evbase, errbase;
-	char		*driver_name;
-	int		newly_opened;
-	drm_magic_t	magic;
-	drmVersionPtr	drm_version;
-	drm_handle_t	sarea_handle;
-	char		*bus_id;
-	dri_screen_t	*dri_scrn;
-
-	assert(display);
-	assert(dri_screen);
-
-	if (!XF86DRIQueryExtension(display, &evbase, &errbase))
-		return 1;
-
-	dri_scrn = calloc(1, sizeof(dri_screen_t));
-
-	if (!dri_scrn)
-		return 1;
-
-	if (!XF86DRIQueryVersion(display, &dri_scrn->dri.major, &dri_scrn->dri.minor, &dri_scrn->dri.patch))
-		goto free_screen;
-
-	dri_scrn->display = display;
-	dri_scrn->num = screen;
-	dri_scrn->draw_lock_id = 1;
-
-	if (!XF86DRIOpenConnection(display, screen, &sarea_handle, &bus_id))
-		goto free_screen;
-
-	dri_scrn->fd = -1;
-	dri_scrn->fd = drmOpenOnce(NULL, bus_id, &newly_opened);
-	XFree(bus_id);
-
-	if (dri_scrn->fd < 0)
-		goto close_connection;
-
-	if (drmGetMagic(dri_scrn->fd, &magic))
-		goto close_drm;
-
-	drm_version = drmGetVersion(dri_scrn->fd);
-
-	if (!drm_version)
-		goto close_drm;
-
-	dri_scrn->drm.major = drm_version->version_major;
-	dri_scrn->drm.minor = drm_version->version_minor;
-	dri_scrn->drm.patch = drm_version->version_patchlevel;
-	drmFreeVersion(drm_version);
-
-	if (!XF86DRIAuthConnection(display, screen, magic))
-		goto close_drm;
-
-	if (!XF86DRIGetClientDriverName
-	(
-		display,
-		screen,
-		&dri_scrn->ddx.major,
-		&dri_scrn->ddx.minor,
-		&dri_scrn->ddx.patch,
-		&driver_name
-	))
-		goto close_drm;
-
-	if (drmMap(dri_scrn->fd, sarea_handle, SAREA_MAX, (drmAddress)&dri_scrn->sarea))
-		goto close_drm;
-
-	dri_scrn->drawable_hash = drmHashCreate();
-
-	if (!dri_scrn->drawable_hash)
-		goto unmap_sarea;
-
-	if (dri_framebuf)
-	{
-		if (!XF86DRIGetDeviceInfo
-		(
-			display,
-			screen, &dri_framebuf->drm_handle,
-			&dri_framebuf->base,
-			&dri_framebuf->size,
-			&dri_framebuf->stride,
-			&dri_framebuf->private_size,
-			&dri_framebuf->private
-		))
-			goto destroy_hash;
-	}
-
-	*dri_screen = dri_scrn;
-
-	return 0;
-
-destroy_hash:
-	drmHashDestroy(dri_scrn->drawable_hash);
-unmap_sarea:
-	drmUnmap(dri_scrn->sarea, SAREA_MAX);
-close_drm:
-	drmCloseOnce(dri_scrn->fd);
-close_connection:
-	XF86DRICloseConnection(display, screen);
-free_screen:
-	free(dri_scrn);
-
-	return 1;
-}
-
-int driDestroyScreen(dri_screen_t *dri_screen)
-{
-	Drawable	draw;
-	dri_drawable_t	*dri_draw;
-
-	assert(dri_screen);
-
-	if (drmHashFirst(dri_screen->drawable_hash, &draw, (void**)&dri_draw))
-	{
-		dri_draw->refcount = 1;
-		driDestroyDrawable(dri_draw);
-
-		while (drmHashNext(dri_screen->drawable_hash, &draw, (void**)&dri_draw))
-		{
-			dri_draw->refcount = 1;
-			driDestroyDrawable(dri_draw);
-		}
-	}
-
-	drmHashDestroy(dri_screen->drawable_hash);
-	drmUnmap(dri_screen->sarea, SAREA_MAX);
-	drmCloseOnce(dri_screen->fd);
-	XF86DRICloseConnection(dri_screen->display, dri_screen->num);
-	free(dri_screen);
-
-	return 0;
-}
-
-int driCreateDrawable(dri_screen_t *dri_screen, Drawable drawable, dri_drawable_t **dri_drawable)
-{
-	int		evbase, errbase;
-	dri_drawable_t	*dri_draw;
-
-	assert(dri_screen);
-	assert(dri_drawable);
-
-	if (!XF86DRIQueryExtension(dri_screen->display, &evbase, &errbase))
-		return 1;
-
-	if (!drmHashLookup(dri_screen->drawable_hash, drawable, (void**)dri_drawable))
-	{
-		/* Found */
-		(*dri_drawable)->refcount++;
-		return 0;
-	}
-
-	dri_draw = calloc(1, sizeof(dri_drawable_t));
-
-	if (!dri_draw)
-		return 1;
-
-	if (!XF86DRICreateDrawable(dri_screen->display, 0, drawable, &dri_draw->drm_drawable))
-	{
-		free(dri_draw);
-		return 1;
-	}
-
-	dri_draw->x_drawable = drawable;
-	dri_draw->sarea_index = 0;
-	dri_draw->sarea_stamp = NULL;
-	dri_draw->last_sarea_stamp = 0;
-	dri_draw->dri_screen = dri_screen;
-	dri_draw->refcount = 1;
-
-	if (drmHashInsert(dri_screen->drawable_hash, drawable, dri_draw))
-	{
-		XF86DRIDestroyDrawable(dri_screen->display, dri_screen->num, drawable);
-		free(dri_draw);
-		return 1;
-	}
-
-	if (!dri_draw->sarea_stamp || *dri_draw->sarea_stamp != dri_draw->last_sarea_stamp)
-	{
-		DRM_SPINLOCK(&dri_screen->sarea->drawable_lock, dri_screen->draw_lock_id);
-
-		if (driUpdateDrawableInfo(dri_draw))
-		{
-			XF86DRIDestroyDrawable(dri_screen->display, dri_screen->num, drawable);
-			free(dri_draw);
-			DRM_SPINUNLOCK(&dri_screen->sarea->drawable_lock, dri_screen->draw_lock_id);
-			return 1;
-		}
-
-		DRM_SPINUNLOCK(&dri_screen->sarea->drawable_lock, dri_screen->draw_lock_id);
-	}
-
-	*dri_drawable = dri_draw;
-
-	return 0;
-}
-
-int driUpdateDrawableInfo(dri_drawable_t *dri_drawable)
-{
-	assert(dri_drawable);
-
-	if (dri_drawable->cliprects)
-	{
-		XFree(dri_drawable->cliprects);
-		dri_drawable->cliprects = NULL;
-	}
-	if (dri_drawable->back_cliprects)
-	{
-		XFree(dri_drawable->back_cliprects);
-		dri_drawable->back_cliprects = NULL;
-	}
-
-	DRM_SPINUNLOCK(&dri_drawable->dri_screen->sarea->drawable_lock, dri_drawable->dri_screen->draw_lock_id);
-
-	if (!XF86DRIGetDrawableInfo
-	(
-		dri_drawable->dri_screen->display,
-		dri_drawable->dri_screen->num,
-		dri_drawable->x_drawable,
-    		&dri_drawable->sarea_index,
-    		&dri_drawable->last_sarea_stamp,
-    		&dri_drawable->x,
-    		&dri_drawable->y,
-    		&dri_drawable->w,
-    		&dri_drawable->h,
-		&dri_drawable->num_cliprects,
-		&dri_drawable->cliprects,
-		&dri_drawable->back_x,
-		&dri_drawable->back_y,
-		&dri_drawable->num_back_cliprects,
-		&dri_drawable->back_cliprects
-	))
-	{
-		dri_drawable->sarea_stamp = &dri_drawable->last_sarea_stamp;
-		dri_drawable->num_cliprects = 0;
-		dri_drawable->cliprects = NULL;
-		dri_drawable->num_back_cliprects = 0;
-		dri_drawable->back_cliprects = 0;
-
-		return 1;
-	}
-	else
-		dri_drawable->sarea_stamp = &dri_drawable->dri_screen->sarea->drawableTable[dri_drawable->sarea_index].stamp;
-
-	DRM_SPINLOCK(&dri_drawable->dri_screen->sarea->drawable_lock, dri_drawable->dri_screen->draw_lock_id);
-
-	return 0;
-}
-
-int driDestroyDrawable(dri_drawable_t *dri_drawable)
-{
-	assert(dri_drawable);
-
-	if (--dri_drawable->refcount == 0)
-	{
-		if (dri_drawable->cliprects)
-			XFree(dri_drawable->cliprects);
-		if (dri_drawable->back_cliprects)
-			XFree(dri_drawable->back_cliprects);
-		drmHashDelete(dri_drawable->dri_screen->drawable_hash, dri_drawable->x_drawable);
-		XF86DRIDestroyDrawable(dri_drawable->dri_screen->display, dri_drawable->dri_screen->num, dri_drawable->x_drawable);
-		free(dri_drawable);
-	}
-
-	return 0;
-}
-
-int driCreateContext(dri_screen_t *dri_screen, Visual *visual, dri_context_t **dri_context)
-{
-	int		evbase, errbase;
-	dri_context_t	*dri_ctx;
-
-	assert(dri_screen);
-	assert(visual);
-	assert(dri_context);
-
-	if (!XF86DRIQueryExtension(dri_screen->display, &evbase, &errbase))
-		return 1;
-
-	dri_ctx = calloc(1, sizeof(dri_context_t));
-
-	if (!dri_ctx)
-		return 1;
-
-	if (!XF86DRICreateContext(dri_screen->display, dri_screen->num, visual, &dri_ctx->id, &dri_ctx->drm_context))
-	{
-		free(dri_ctx);
-		return 1;
-	}
-
-	dri_ctx->dri_screen = dri_screen;
-	*dri_context = dri_ctx;
-
-	return 0;
-}
-
-int driDestroyContext(dri_context_t *dri_context)
-{
-	assert(dri_context);
-
-	XF86DRIDestroyContext(dri_context->dri_screen->display, dri_context->dri_screen->num, dri_context->id);
-	free(dri_context);
-
-	return 0;
-}
diff --git a/src/driclient/src/xf86dristr.h b/src/driclient/src/xf86dristr.h
deleted file mode 100644
index b834bd1a1a0..00000000000
--- a/src/driclient/src/xf86dristr.h
+++ /dev/null
@@ -1,342 +0,0 @@
-/**************************************************************************
-
-Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
-Copyright 2000 VA Linux Systems, Inc.
-All Rights Reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a
-copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sub license, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice (including the
-next paragraph) shall be included in all copies or substantial portions
-of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
-ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-**************************************************************************/
-
-/*
- * Authors:
- *   Kevin E. Martin <martin@valinux.com>
- *   Jens Owen <jens@tungstengraphics.com>
- *   Rickard E. (Rik) Fiath <faith@valinux.com>
- *
- */
-
-#ifndef _XF86DRISTR_H_
-#define _XF86DRISTR_H_
-
-#include "xf86dri.h"
-
-#define XF86DRINAME "XFree86-DRI"
-
-/* The DRI version number.  This was originally set to be the same of the
- * XFree86 version number.  However, this version is really indepedent of
- * the XFree86 version.
- *
- * Version History:
- *    4.0.0: Original
- *    4.0.1: Patch to bump clipstamp when windows are destroyed, 28 May 02
- *    4.1.0: Add transition from single to multi in DRMInfo rec, 24 Jun 02
- */
-#define XF86DRI_MAJOR_VERSION	4
-#define XF86DRI_MINOR_VERSION	1
-#define XF86DRI_PATCH_VERSION	0
-
-typedef struct _XF86DRIQueryVersion {
-    CARD8	reqType;		/* always DRIReqCode */
-    CARD8	driReqType;		/* always X_DRIQueryVersion */
-    CARD16	length B16;
-} xXF86DRIQueryVersionReq;
-#define sz_xXF86DRIQueryVersionReq	4
-
-typedef struct {
-    BYTE	type;			/* X_Reply */
-    BOOL	pad1;
-    CARD16	sequenceNumber B16;
-    CARD32	length B32;
-    CARD16	majorVersion B16;	/* major version of DRI protocol */
-    CARD16	minorVersion B16;	/* minor version of DRI protocol */
-    CARD32	patchVersion B32;       /* patch version of DRI protocol */
-    CARD32	pad3 B32;
-    CARD32	pad4 B32;
-    CARD32	pad5 B32;
-    CARD32	pad6 B32;
-} xXF86DRIQueryVersionReply;
-#define sz_xXF86DRIQueryVersionReply	32
-
-typedef struct _XF86DRIQueryDirectRenderingCapable {
-    CARD8	reqType;		/* always DRIReqCode */
-    CARD8	driReqType;		/* X_DRIQueryDirectRenderingCapable */
-    CARD16	length B16;
-    CARD32	screen B32;
-} xXF86DRIQueryDirectRenderingCapableReq;
-#define sz_xXF86DRIQueryDirectRenderingCapableReq	8
-
-typedef struct {
-    BYTE	type;			/* X_Reply */
-    BOOL	pad1;
-    CARD16	sequenceNumber B16;
-    CARD32	length B32;
-    BOOL	isCapable;
-    BOOL	pad2;
-    BOOL	pad3;
-    BOOL	pad4;
-    CARD32	pad5 B32;
-    CARD32	pad6 B32;
-    CARD32	pad7 B32;
-    CARD32	pad8 B32;
-    CARD32	pad9 B32;
-} xXF86DRIQueryDirectRenderingCapableReply;
-#define sz_xXF86DRIQueryDirectRenderingCapableReply	32
-
-typedef struct _XF86DRIOpenConnection {
-    CARD8	reqType;		/* always DRIReqCode */
-    CARD8	driReqType;		/* always X_DRIOpenConnection */
-    CARD16	length B16;
-    CARD32	screen B32;
-} xXF86DRIOpenConnectionReq;
-#define sz_xXF86DRIOpenConnectionReq	8
-
-typedef struct {
-    BYTE	type;			/* X_Reply */
-    BOOL	pad1;
-    CARD16	sequenceNumber B16;
-    CARD32	length B32;
-    CARD32	hSAREALow B32;
-    CARD32	hSAREAHigh B32;
-    CARD32	busIdStringLength B32;
-    CARD32	pad6 B32;
-    CARD32	pad7 B32;
-    CARD32	pad8 B32;
-} xXF86DRIOpenConnectionReply;
-#define sz_xXF86DRIOpenConnectionReply	32
-
-typedef struct _XF86DRIAuthConnection {
-    CARD8	reqType;		/* always DRIReqCode */
-    CARD8	driReqType;		/* always X_DRICloseConnection */
-    CARD16	length B16;
-    CARD32	screen B32;
-    CARD32      magic B32;
-} xXF86DRIAuthConnectionReq;
-#define sz_xXF86DRIAuthConnectionReq	12
-
-typedef struct {
-    BYTE        type;
-    BOOL        pad1;
-    CARD16      sequenceNumber B16;
-    CARD32      length B32;
-    CARD32      authenticated B32;
-    CARD32      pad2 B32;
-    CARD32      pad3 B32;
-    CARD32      pad4 B32;
-    CARD32      pad5 B32;
-    CARD32      pad6 B32;
-} xXF86DRIAuthConnectionReply;
-#define zx_xXF86DRIAuthConnectionReply  32
-
-typedef struct _XF86DRICloseConnection {
-    CARD8	reqType;		/* always DRIReqCode */
-    CARD8	driReqType;		/* always X_DRICloseConnection */
-    CARD16	length B16;
-    CARD32	screen B32;
-} xXF86DRICloseConnectionReq;
-#define sz_xXF86DRICloseConnectionReq	8
-
-typedef struct _XF86DRIGetClientDriverName {
-    CARD8	reqType;		/* always DRIReqCode */
-    CARD8	driReqType;		/* always X_DRIGetClientDriverName */
-    CARD16	length B16;
-    CARD32	screen B32;
-} xXF86DRIGetClientDriverNameReq;
-#define sz_xXF86DRIGetClientDriverNameReq	8
-
-typedef struct {
-    BYTE	type;			/* X_Reply */
-    BOOL	pad1;
-    CARD16	sequenceNumber B16;
-    CARD32	length B32;
-    CARD32	ddxDriverMajorVersion B32;
-    CARD32	ddxDriverMinorVersion B32;
-    CARD32	ddxDriverPatchVersion B32;
-    CARD32	clientDriverNameLength B32;
-    CARD32	pad5 B32;
-    CARD32	pad6 B32;
-} xXF86DRIGetClientDriverNameReply;
-#define sz_xXF86DRIGetClientDriverNameReply	32
-
-typedef struct _XF86DRICreateContext {
-    CARD8	reqType;		/* always DRIReqCode */
-    CARD8	driReqType;		/* always X_DRICreateContext */
-    CARD16	length B16;
-    CARD32	screen B32;
-    CARD32	visual B32;
-    CARD32	context B32;
-} xXF86DRICreateContextReq;
-#define sz_xXF86DRICreateContextReq	16
-
-typedef struct {
-    BYTE	type;			/* X_Reply */
-    BOOL	pad1;
-    CARD16	sequenceNumber B16;
-    CARD32	length B32;
-    CARD32	hHWContext B32;
-    CARD32	pad2 B32;
-    CARD32	pad3 B32;
-    CARD32	pad4 B32;
-    CARD32	pad5 B32;
-    CARD32	pad6 B32;
-} xXF86DRICreateContextReply;
-#define sz_xXF86DRICreateContextReply	32
-
-typedef struct _XF86DRIDestroyContext {
-    CARD8	reqType;		/* always DRIReqCode */
-    CARD8	driReqType;		/* always X_DRIDestroyContext */
-    CARD16	length B16;
-    CARD32	screen B32;
-    CARD32	context B32;
-} xXF86DRIDestroyContextReq;
-#define sz_xXF86DRIDestroyContextReq	12
-
-typedef struct _XF86DRICreateDrawable {
-    CARD8	reqType;		/* always DRIReqCode */
-    CARD8	driReqType;		/* always X_DRICreateDrawable */
-    CARD16	length B16;
-    CARD32	screen B32;
-    CARD32	drawable B32;
-} xXF86DRICreateDrawableReq;
-#define sz_xXF86DRICreateDrawableReq	12
-
-typedef struct {
-    BYTE	type;			/* X_Reply */
-    BOOL	pad1;
-    CARD16	sequenceNumber B16;
-    CARD32	length B32;
-    CARD32	hHWDrawable B32;
-    CARD32	pad2 B32;
-    CARD32	pad3 B32;
-    CARD32	pad4 B32;
-    CARD32	pad5 B32;
-    CARD32	pad6 B32;
-} xXF86DRICreateDrawableReply;
-#define sz_xXF86DRICreateDrawableReply	32
-
-typedef struct _XF86DRIDestroyDrawable {
-    CARD8	reqType;		/* always DRIReqCode */
-    CARD8	driReqType;		/* always X_DRIDestroyDrawable */
-    CARD16	length B16;
-    CARD32	screen B32;
-    CARD32	drawable B32;
-} xXF86DRIDestroyDrawableReq;
-#define sz_xXF86DRIDestroyDrawableReq	12
-
-typedef struct _XF86DRIGetDrawableInfo {
-    CARD8	reqType;		/* always DRIReqCode */
-    CARD8	driReqType;		/* always X_DRIGetDrawableInfo */
-    CARD16	length B16;
-    CARD32	screen B32;
-    CARD32	drawable B32;
-} xXF86DRIGetDrawableInfoReq;
-#define sz_xXF86DRIGetDrawableInfoReq	12
-
-typedef struct {
-    BYTE	type;			/* X_Reply */
-    BOOL	pad1;
-    CARD16	sequenceNumber B16;
-    CARD32	length B32;
-    CARD32	drawableTableIndex B32;
-    CARD32	drawableTableStamp B32;
-    INT16	drawableX B16;
-    INT16	drawableY B16;
-    INT16	drawableWidth B16;
-    INT16	drawableHeight B16;
-    CARD32	numClipRects B32;
-    INT16       backX B16;
-    INT16       backY B16;
-    CARD32      numBackClipRects B32;
-} xXF86DRIGetDrawableInfoReply;
-
-#define sz_xXF86DRIGetDrawableInfoReply	36
-
-
-typedef struct _XF86DRIGetDeviceInfo {
-    CARD8	reqType;		/* always DRIReqCode */
-    CARD8	driReqType;		/* always X_DRIGetDeviceInfo */
-    CARD16	length B16;
-    CARD32	screen B32;
-} xXF86DRIGetDeviceInfoReq;
-#define sz_xXF86DRIGetDeviceInfoReq	8
-
-typedef struct {
-    BYTE	type;			/* X_Reply */
-    BOOL	pad1;
-    CARD16	sequenceNumber B16;
-    CARD32	length B32;
-    CARD32	hFrameBufferLow B32;
-    CARD32	hFrameBufferHigh B32;
-    CARD32	framebufferOrigin B32;
-    CARD32	framebufferSize B32;
-    CARD32	framebufferStride B32;
-    CARD32	devPrivateSize B32;
-} xXF86DRIGetDeviceInfoReply;
-#define sz_xXF86DRIGetDeviceInfoReply	32
-
-typedef struct _XF86DRIOpenFullScreen {
-    CARD8       reqType;	/* always DRIReqCode */
-    CARD8       driReqType;	/* always X_DRIOpenFullScreen */
-    CARD16      length B16;
-    CARD32      screen B32;
-    CARD32      drawable B32;
-} xXF86DRIOpenFullScreenReq;
-#define sz_xXF86DRIOpenFullScreenReq    12
-
-typedef struct {
-    BYTE        type;
-    BOOL        pad1;
-    CARD16      sequenceNumber B16;
-    CARD32      length B32;
-    CARD32      isFullScreen B32;
-    CARD32      pad2 B32;
-    CARD32      pad3 B32;
-    CARD32      pad4 B32;
-    CARD32      pad5 B32;
-    CARD32      pad6 B32;
-} xXF86DRIOpenFullScreenReply;
-#define sz_xXF86DRIOpenFullScreenReply  32
-
-typedef struct _XF86DRICloseFullScreen {
-    CARD8       reqType;	/* always DRIReqCode */
-    CARD8       driReqType;	/* always X_DRICloseFullScreen */
-    CARD16      length B16;
-    CARD32      screen B32;
-    CARD32      drawable B32;
-} xXF86DRICloseFullScreenReq;
-#define sz_xXF86DRICloseFullScreenReq   12
-
-typedef struct {
-    BYTE        type;
-    BOOL        pad1;
-    CARD16      sequenceNumber B16;
-    CARD32      length B32;
-    CARD32      pad2 B32;
-    CARD32      pad3 B32;
-    CARD32      pad4 B32;
-    CARD32      pad5 B32;
-    CARD32      pad6 B32;
-    CARD32      pad7 B32;
-} xXF86DRICloseFullScreenReply;
-#define sz_xXF86DRICloseFullScreenReply  32
-
-
-#endif /* _XF86DRISTR_H_ */
diff --git a/src/gallium/winsys/Makefile b/src/gallium/winsys/Makefile
index bce5b3f9e0d..e7cb154b84b 100644
--- a/src/gallium/winsys/Makefile
+++ b/src/gallium/winsys/Makefile
@@ -2,7 +2,7 @@
 TOP = ../../..
 include $(TOP)/configs/current
 
-SUBDIRS = $(GALLIUM_WINSYS_DIRS)
+SUBDIRS = $(GALLIUM_WINSYS_DIRS) g3dvl
 
 default install clean:
 	@for dir in $(SUBDIRS) ; do \
diff --git a/src/gallium/winsys/g3dvl/Makefile b/src/gallium/winsys/g3dvl/Makefile
index 424ddea87ad..6c793e0f15b 100644
--- a/src/gallium/winsys/g3dvl/Makefile
+++ b/src/gallium/winsys/g3dvl/Makefile
@@ -1,7 +1,8 @@
+# src/gallium/winsys/Makefile
 TOP = ../../../..
 include $(TOP)/configs/current
 
-SUBDIRS = $(GALLIUM_WINSYS_DIRS)
+SUBDIRS = $(GALLIUM_STATE_TRACKERS_DIRS) $(GALLIUM_WINSYS_DIRS)
 
 default install clean:
 	@for dir in $(SUBDIRS) ; do \
diff --git a/src/gallium/winsys/g3dvl/dri/Makefile b/src/gallium/winsys/g3dvl/dri/Makefile
new file mode 100644
index 00000000000..dcd58ef6d2e
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/dri/Makefile
@@ -0,0 +1,14 @@
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = vldri
+
+LIBRARY_INCLUDES = -I$(TOP)/src/gallium/winsys/g3dvl \
+                   $(shell pkg-config libdrm --cflags-only-I)
+
+C_SOURCES = \
+	driclient.c \
+	XF86dri.c \
+	dri_winsys.c
+
+include ../../../Makefile.template
diff --git a/src/gallium/winsys/g3dvl/dri/XF86dri.c b/src/gallium/winsys/g3dvl/dri/XF86dri.c
new file mode 100644
index 00000000000..9e359a92384
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/dri/XF86dri.c
@@ -0,0 +1,619 @@
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+Copyright 2000 VA Linux Systems, Inc.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Jens Owen <jens@tungstengraphics.com>
+ *   Rickard E. (Rik) Faith <faith@valinux.com>
+ *
+ */
+
+/* THIS IS NOT AN X CONSORTIUM STANDARD */
+
+#define NEED_REPLIES
+#include <X11/Xlibint.h>
+#include <X11/extensions/Xext.h>
+#include <X11/extensions/extutil.h>
+#include "xf86dristr.h"
+
+static XExtensionInfo _xf86dri_info_data;
+static XExtensionInfo *xf86dri_info = &_xf86dri_info_data;
+static char xf86dri_extension_name[] = XF86DRINAME;
+
+#define XF86DRICheckExtension(dpy,i,val) \
+  XextCheckExtension (dpy, i, xf86dri_extension_name, val)
+
+/*****************************************************************************
+ *                                                                           *
+ *			   private utility routines                          *
+ *                                                                           *
+ *****************************************************************************/
+
+static int close_display(Display *dpy, XExtCodes *extCodes);
+static /* const */ XExtensionHooks xf86dri_extension_hooks = {
+    NULL,				/* create_gc */
+    NULL,				/* copy_gc */
+    NULL,				/* flush_gc */
+    NULL,				/* free_gc */
+    NULL,				/* create_font */
+    NULL,				/* free_font */
+    close_display,			/* close_display */
+    NULL,				/* wire_to_event */
+    NULL,				/* event_to_wire */
+    NULL,				/* error */
+    NULL,				/* error_string */
+};
+
+static XEXT_GENERATE_FIND_DISPLAY (find_display, xf86dri_info, 
+				   xf86dri_extension_name, 
+				   &xf86dri_extension_hooks, 
+				   0, NULL)
+
+static XEXT_GENERATE_CLOSE_DISPLAY (close_display, xf86dri_info)
+
+
+/*****************************************************************************
+ *                                                                           *
+ *		    public XFree86-DRI Extension routines                    *
+ *                                                                           *
+ *****************************************************************************/
+
+#if 0
+#include <stdio.h>
+#define TRACE(msg)  fprintf(stderr,"XF86DRI%s\n", msg);
+#else
+#define TRACE(msg)
+#endif
+
+#define PUBLIC
+
+PUBLIC Bool XF86DRIQueryExtension (dpy, event_basep, error_basep)
+    Display *dpy;
+    int *event_basep, *error_basep;
+{
+    XExtDisplayInfo *info = find_display (dpy);
+
+    TRACE("QueryExtension...");
+    if (XextHasExtension(info)) {
+	*event_basep = info->codes->first_event;
+	*error_basep = info->codes->first_error;
+        TRACE("QueryExtension... return True");
+	return True;
+    } else {
+        TRACE("QueryExtension... return False");
+	return False;
+    }
+}
+
+PUBLIC Bool XF86DRIQueryVersion(dpy, majorVersion, minorVersion, patchVersion)
+    Display* dpy;
+    int* majorVersion; 
+    int* minorVersion;
+    int* patchVersion;
+{
+    XExtDisplayInfo *info = find_display (dpy);
+    xXF86DRIQueryVersionReply rep;
+    xXF86DRIQueryVersionReq *req;
+
+    TRACE("QueryVersion...");
+    XF86DRICheckExtension (dpy, info, False);
+
+    LockDisplay(dpy);
+    GetReq(XF86DRIQueryVersion, req);
+    req->reqType = info->codes->major_opcode;
+    req->driReqType = X_XF86DRIQueryVersion;
+    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+	UnlockDisplay(dpy);
+	SyncHandle();
+        TRACE("QueryVersion... return False");
+	return False;
+    }
+    *majorVersion = rep.majorVersion;
+    *minorVersion = rep.minorVersion;
+    *patchVersion = rep.patchVersion;
+    UnlockDisplay(dpy);
+    SyncHandle();
+    TRACE("QueryVersion... return True");
+    return True;
+}
+
+PUBLIC Bool XF86DRIQueryDirectRenderingCapable(dpy, screen, isCapable)
+    Display* dpy;
+    int screen;
+    Bool* isCapable;
+{
+    XExtDisplayInfo *info = find_display (dpy);
+    xXF86DRIQueryDirectRenderingCapableReply rep;
+    xXF86DRIQueryDirectRenderingCapableReq *req;
+
+    TRACE("QueryDirectRenderingCapable...");
+    XF86DRICheckExtension (dpy, info, False);
+
+    LockDisplay(dpy);
+    GetReq(XF86DRIQueryDirectRenderingCapable, req);
+    req->reqType = info->codes->major_opcode;
+    req->driReqType = X_XF86DRIQueryDirectRenderingCapable;
+    req->screen = screen;
+    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+	UnlockDisplay(dpy);
+	SyncHandle();
+        TRACE("QueryDirectRenderingCapable... return False");
+	return False;
+    }
+    *isCapable = rep.isCapable;
+    UnlockDisplay(dpy);
+    SyncHandle();
+    TRACE("QueryDirectRenderingCapable... return True");
+    return True;
+}
+
+PUBLIC Bool XF86DRIOpenConnection(dpy, screen, hSAREA, busIdString)
+    Display* dpy;
+    int screen;
+    drm_handle_t * hSAREA;
+    char **busIdString;
+{
+    XExtDisplayInfo *info = find_display (dpy);
+    xXF86DRIOpenConnectionReply rep;
+    xXF86DRIOpenConnectionReq *req;
+
+    TRACE("OpenConnection...");
+    XF86DRICheckExtension (dpy, info, False);
+
+    LockDisplay(dpy);
+    GetReq(XF86DRIOpenConnection, req);
+    req->reqType = info->codes->major_opcode;
+    req->driReqType = X_XF86DRIOpenConnection;
+    req->screen = screen;
+    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+	UnlockDisplay(dpy);
+	SyncHandle();
+        TRACE("OpenConnection... return False");
+	return False;
+    }
+
+    *hSAREA = rep.hSAREALow;
+    if (sizeof(drm_handle_t) == 8) {
+       int shift = 32; /* var to prevent warning on next line */
+       *hSAREA |= ((drm_handle_t) rep.hSAREAHigh) << shift;
+    }
+
+    if (rep.length) {
+        if (!(*busIdString = (char *)Xcalloc(rep.busIdStringLength + 1, 1))) {
+            _XEatData(dpy, ((rep.busIdStringLength+3) & ~3));
+            UnlockDisplay(dpy);
+            SyncHandle();
+            TRACE("OpenConnection... return False");
+            return False;
+        }
+	_XReadPad(dpy, *busIdString, rep.busIdStringLength);
+    } else {
+        *busIdString = NULL;
+    }
+    UnlockDisplay(dpy);
+    SyncHandle();
+    TRACE("OpenConnection... return True");
+    return True;
+}
+
+PUBLIC Bool XF86DRIAuthConnection(dpy, screen, magic)
+    Display* dpy;
+    int screen;
+    drm_magic_t magic;
+{
+    XExtDisplayInfo *info = find_display (dpy);
+    xXF86DRIAuthConnectionReq *req;
+    xXF86DRIAuthConnectionReply rep;
+
+    TRACE("AuthConnection...");
+    XF86DRICheckExtension (dpy, info, False);
+
+    LockDisplay(dpy);
+    GetReq(XF86DRIAuthConnection, req);
+    req->reqType = info->codes->major_opcode;
+    req->driReqType = X_XF86DRIAuthConnection;
+    req->screen = screen;
+    req->magic = magic;
+    rep.authenticated = 0;
+    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse) || !rep.authenticated) {
+	UnlockDisplay(dpy);
+	SyncHandle();
+        TRACE("AuthConnection... return False");
+	return False;
+    }
+    UnlockDisplay(dpy);
+    SyncHandle();
+    TRACE("AuthConnection... return True");
+    return True;
+}
+
+PUBLIC Bool XF86DRICloseConnection(dpy, screen)
+    Display* dpy;
+    int screen;
+{
+    XExtDisplayInfo *info = find_display (dpy);
+    xXF86DRICloseConnectionReq *req;
+
+    TRACE("CloseConnection...");
+
+    XF86DRICheckExtension (dpy, info, False);
+
+    LockDisplay(dpy);
+    GetReq(XF86DRICloseConnection, req);
+    req->reqType = info->codes->major_opcode;
+    req->driReqType = X_XF86DRICloseConnection;
+    req->screen = screen;
+    UnlockDisplay(dpy);
+    SyncHandle();
+    TRACE("CloseConnection... return True");
+    return True;
+}
+
+PUBLIC Bool XF86DRIGetClientDriverName(dpy, screen, ddxDriverMajorVersion, 
+	ddxDriverMinorVersion, ddxDriverPatchVersion, clientDriverName)
+    Display* dpy;
+    int screen;
+    int* ddxDriverMajorVersion;
+    int* ddxDriverMinorVersion;
+    int* ddxDriverPatchVersion;
+    char** clientDriverName;
+{
+    XExtDisplayInfo *info = find_display (dpy);
+    xXF86DRIGetClientDriverNameReply rep;
+    xXF86DRIGetClientDriverNameReq *req;
+
+    TRACE("GetClientDriverName...");
+    XF86DRICheckExtension (dpy, info, False);
+
+    LockDisplay(dpy);
+    GetReq(XF86DRIGetClientDriverName, req);
+    req->reqType = info->codes->major_opcode;
+    req->driReqType = X_XF86DRIGetClientDriverName;
+    req->screen = screen;
+    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+	UnlockDisplay(dpy);
+	SyncHandle();
+        TRACE("GetClientDriverName... return False");
+	return False;
+    }
+
+    *ddxDriverMajorVersion = rep.ddxDriverMajorVersion;
+    *ddxDriverMinorVersion = rep.ddxDriverMinorVersion;
+    *ddxDriverPatchVersion = rep.ddxDriverPatchVersion;
+
+    if (rep.length) {
+        if (!(*clientDriverName = (char *)Xcalloc(rep.clientDriverNameLength + 1, 1))) {
+            _XEatData(dpy, ((rep.clientDriverNameLength+3) & ~3));
+            UnlockDisplay(dpy);
+            SyncHandle();
+            TRACE("GetClientDriverName... return False");
+            return False;
+        }
+	_XReadPad(dpy, *clientDriverName, rep.clientDriverNameLength);
+    } else {
+        *clientDriverName = NULL;
+    }
+    UnlockDisplay(dpy);
+    SyncHandle();
+    TRACE("GetClientDriverName... return True");
+    return True;
+}
+
+PUBLIC Bool XF86DRICreateContextWithConfig(dpy, screen, configID, context,
+	hHWContext)
+    Display* dpy;
+    int screen;
+    int configID;
+    XID* context;
+    drm_context_t * hHWContext;
+{
+    XExtDisplayInfo *info = find_display (dpy);
+    xXF86DRICreateContextReply rep;
+    xXF86DRICreateContextReq *req;
+
+    TRACE("CreateContext...");
+    XF86DRICheckExtension (dpy, info, False);
+
+    LockDisplay(dpy);
+    GetReq(XF86DRICreateContext, req);
+    req->reqType = info->codes->major_opcode;
+    req->driReqType = X_XF86DRICreateContext;
+    req->visual = configID;
+    req->screen = screen;
+    *context = XAllocID(dpy);
+    req->context = *context;
+    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+	UnlockDisplay(dpy);
+	SyncHandle();
+        TRACE("CreateContext... return False");
+	return False;
+    }
+    *hHWContext = rep.hHWContext;
+    UnlockDisplay(dpy);
+    SyncHandle();
+    TRACE("CreateContext... return True");
+    return True;
+}
+
+PUBLIC Bool XF86DRICreateContext(dpy, screen, visual, context, hHWContext)
+    Display* dpy;
+    int screen;
+    Visual* visual;
+    XID* context;
+    drm_context_t * hHWContext;
+{
+    return XF86DRICreateContextWithConfig( dpy, screen, visual->visualid,
+					   context, hHWContext );
+}
+
+PUBLIC Bool XF86DRIDestroyContext( Display * ndpy, int screen, 
+    XID context )
+{
+    Display * const dpy = (Display *) ndpy;
+    XExtDisplayInfo *info = find_display (dpy);
+    xXF86DRIDestroyContextReq *req;
+
+    TRACE("DestroyContext...");
+    XF86DRICheckExtension (dpy, info, False);
+
+    LockDisplay(dpy);
+    GetReq(XF86DRIDestroyContext, req);
+    req->reqType = info->codes->major_opcode;
+    req->driReqType = X_XF86DRIDestroyContext;
+    req->screen = screen;
+    req->context = context;
+    UnlockDisplay(dpy);
+    SyncHandle();
+    TRACE("DestroyContext... return True");
+    return True;
+}
+
+PUBLIC Bool XF86DRICreateDrawable( Display * ndpy, int screen, 
+    Drawable drawable, drm_drawable_t * hHWDrawable )
+{
+    Display * const dpy = (Display *) ndpy;
+    XExtDisplayInfo *info = find_display (dpy);
+    xXF86DRICreateDrawableReply rep;
+    xXF86DRICreateDrawableReq *req;
+
+    TRACE("CreateDrawable...");
+    XF86DRICheckExtension (dpy, info, False);
+
+    LockDisplay(dpy);
+    GetReq(XF86DRICreateDrawable, req);
+    req->reqType = info->codes->major_opcode;
+    req->driReqType = X_XF86DRICreateDrawable;
+    req->screen = screen;
+    req->drawable = drawable;
+    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+	UnlockDisplay(dpy);
+	SyncHandle();
+        TRACE("CreateDrawable... return False");
+	return False;
+    }
+    *hHWDrawable = rep.hHWDrawable;
+    UnlockDisplay(dpy);
+    SyncHandle();
+    TRACE("CreateDrawable... return True");
+    return True;
+}
+
+PUBLIC Bool XF86DRIDestroyDrawable( Display * ndpy, int screen,
+    Drawable drawable )
+{
+    Display * const dpy = (Display *) ndpy;
+    XExtDisplayInfo *info = find_display (dpy);
+    xXF86DRIDestroyDrawableReq *req;
+
+    TRACE("DestroyDrawable...");
+    XF86DRICheckExtension (dpy, info, False);
+
+    LockDisplay(dpy);
+    GetReq(XF86DRIDestroyDrawable, req);
+    req->reqType = info->codes->major_opcode;
+    req->driReqType = X_XF86DRIDestroyDrawable;
+    req->screen = screen;
+    req->drawable = drawable;
+    UnlockDisplay(dpy);
+    SyncHandle();
+    TRACE("DestroyDrawable... return True");
+    return True;
+}
+
+PUBLIC Bool XF86DRIGetDrawableInfo(Display* dpy, int screen, Drawable drawable,
+    unsigned int* index, unsigned int* stamp,
+    int* X, int* Y, int* W, int* H,
+    int* numClipRects, drm_clip_rect_t ** pClipRects,
+    int* backX, int* backY,
+    int* numBackClipRects, drm_clip_rect_t ** pBackClipRects )
+{
+    XExtDisplayInfo *info = find_display (dpy);
+    xXF86DRIGetDrawableInfoReply rep;
+    xXF86DRIGetDrawableInfoReq *req;
+    int total_rects;
+
+    TRACE("GetDrawableInfo...");
+    XF86DRICheckExtension (dpy, info, False);
+
+    LockDisplay(dpy);
+    GetReq(XF86DRIGetDrawableInfo, req);
+    req->reqType = info->codes->major_opcode;
+    req->driReqType = X_XF86DRIGetDrawableInfo;
+    req->screen = screen;
+    req->drawable = drawable;
+
+    if (!_XReply(dpy, (xReply *)&rep, 1, xFalse)) 
+    {
+	UnlockDisplay(dpy);
+	SyncHandle();
+        TRACE("GetDrawableInfo... return False");
+	return False;
+    }
+    *index = rep.drawableTableIndex;
+    *stamp = rep.drawableTableStamp;
+    *X = (int)rep.drawableX;
+    *Y = (int)rep.drawableY;
+    *W = (int)rep.drawableWidth;
+    *H = (int)rep.drawableHeight;
+    *numClipRects = rep.numClipRects;
+    total_rects = *numClipRects;
+
+    *backX = rep.backX;
+    *backY = rep.backY;
+    *numBackClipRects = rep.numBackClipRects;
+    total_rects += *numBackClipRects;
+
+#if 0
+    /* Because of the fix in Xserver/GL/dri/xf86dri.c, this check breaks
+     * backwards compatibility (Because of the >> 2 shift) but the fix
+     * enables multi-threaded apps to work.
+     */
+    if (rep.length !=  ((((SIZEOF(xXF86DRIGetDrawableInfoReply) - 
+		       SIZEOF(xGenericReply) + 
+		       total_rects * sizeof(drm_clip_rect_t)) + 3) & ~3) >> 2)) {
+        _XEatData(dpy, rep.length);
+	UnlockDisplay(dpy);
+	SyncHandle();
+        TRACE("GetDrawableInfo... return False");
+        return False;
+    }
+#endif
+
+    if (*numClipRects) {
+       int len = sizeof(drm_clip_rect_t) * (*numClipRects);
+
+       *pClipRects = (drm_clip_rect_t *)Xcalloc(len, 1);
+       if (*pClipRects) 
+	  _XRead(dpy, (char*)*pClipRects, len);
+    } else {
+        *pClipRects = NULL;
+    }
+
+    if (*numBackClipRects) {
+       int len = sizeof(drm_clip_rect_t) * (*numBackClipRects);
+
+       *pBackClipRects = (drm_clip_rect_t *)Xcalloc(len, 1);
+       if (*pBackClipRects) 
+	  _XRead(dpy, (char*)*pBackClipRects, len);
+    } else {
+        *pBackClipRects = NULL;
+    }
+
+    UnlockDisplay(dpy);
+    SyncHandle();
+    TRACE("GetDrawableInfo... return True");
+    return True;
+}
+
+PUBLIC Bool XF86DRIGetDeviceInfo(dpy, screen, hFrameBuffer, 
+	fbOrigin, fbSize, fbStride, devPrivateSize, pDevPrivate)
+    Display* dpy;
+    int screen;
+    drm_handle_t * hFrameBuffer;
+    int* fbOrigin;
+    int* fbSize;
+    int* fbStride;
+    int* devPrivateSize;
+    void** pDevPrivate;
+{
+    XExtDisplayInfo *info = find_display (dpy);
+    xXF86DRIGetDeviceInfoReply rep;
+    xXF86DRIGetDeviceInfoReq *req;
+
+    TRACE("GetDeviceInfo...");
+    XF86DRICheckExtension (dpy, info, False);
+
+    LockDisplay(dpy);
+    GetReq(XF86DRIGetDeviceInfo, req);
+    req->reqType = info->codes->major_opcode;
+    req->driReqType = X_XF86DRIGetDeviceInfo;
+    req->screen = screen;
+    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+	UnlockDisplay(dpy);
+	SyncHandle();
+        TRACE("GetDeviceInfo... return False");
+	return False;
+    }
+
+    *hFrameBuffer = rep.hFrameBufferLow;
+    if (sizeof(drm_handle_t) == 8) {
+       int shift = 32; /* var to prevent warning on next line */
+       *hFrameBuffer |= ((drm_handle_t) rep.hFrameBufferHigh) << shift;
+    }
+
+    *fbOrigin = rep.framebufferOrigin;
+    *fbSize = rep.framebufferSize;
+    *fbStride = rep.framebufferStride;
+    *devPrivateSize = rep.devPrivateSize;
+
+    if (rep.length) {
+        if (!(*pDevPrivate = (void *)Xcalloc(rep.devPrivateSize, 1))) {
+            _XEatData(dpy, ((rep.devPrivateSize+3) & ~3));
+            UnlockDisplay(dpy);
+            SyncHandle();
+            TRACE("GetDeviceInfo... return False");
+            return False;
+        }
+	_XRead(dpy, (char*)*pDevPrivate, rep.devPrivateSize);
+    } else {
+        *pDevPrivate = NULL;
+    }
+
+    UnlockDisplay(dpy);
+    SyncHandle();
+    TRACE("GetDeviceInfo... return True");
+    return True;
+}
+
+PUBLIC Bool XF86DRIOpenFullScreen(dpy, screen, drawable)
+    Display* dpy;
+    int screen;
+    Drawable drawable;
+{
+    /* This function and the underlying X protocol are deprecated.
+     */
+    (void) dpy;
+    (void) screen;
+    (void) drawable;
+    return False;
+}
+
+PUBLIC Bool XF86DRICloseFullScreen(dpy, screen, drawable)
+    Display* dpy;
+    int screen;
+    Drawable drawable;
+{
+    /* This function and the underlying X protocol are deprecated.
+     */
+    (void) dpy;
+    (void) screen;
+    (void) drawable;
+    return True;
+}
+
diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
new file mode 100644
index 00000000000..257aa0a1201
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -0,0 +1,393 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <vl_winsys.h>
+#include <driclient.h>
+#include <state_tracker/dri1_api.h>
+#include <pipe/p_video_context.h>
+#include <pipe/p_state.h>
+#include <util/u_memory.h>
+
+struct vl_dri_screen
+{
+   struct vl_screen base;
+   Visual *visual;
+   struct drm_api *api;
+   dri_screen_t *dri_screen;
+   dri_framebuffer_t dri_framebuf;
+   struct dri1_api *api_hooks;
+};
+
+struct vl_dri_context
+{
+   struct vl_context base;
+   boolean is_locked;
+   boolean lost_lock;
+   drmLock *lock;
+   dri_context_t *dri_context;
+   int fd;
+   struct pipe_video_context *vpipe;
+   dri_drawable_t *drawable;
+};
+
+static void
+vl_dri_lock(void *priv)
+{
+   struct vl_dri_context *vl_dri_ctx = priv;
+   drm_context_t hw_context;
+   char ret = 0;
+
+   assert(priv);
+
+   hw_context = vl_dri_ctx->dri_context->drm_context;
+
+   DRM_CAS(vl_dri_ctx->lock, hw_context, DRM_LOCK_HELD | hw_context, ret);
+   if (ret) {
+      drmGetLock(vl_dri_ctx->fd, hw_context, 0);
+      vl_dri_ctx->lost_lock = TRUE;
+   }
+   vl_dri_ctx->is_locked = TRUE;
+}
+
+static void
+vl_dri_unlock(void *priv)
+{
+   struct vl_dri_context *vl_dri_ctx = priv;
+   drm_context_t hw_context;
+
+   assert(priv);
+
+   hw_context = vl_dri_ctx->dri_context->drm_context;
+
+   vl_dri_ctx->is_locked = FALSE;
+   DRM_UNLOCK(vl_dri_ctx->fd, vl_dri_ctx->lock, hw_context);
+}
+
+static boolean
+vl_dri_is_locked(void *priv)
+{
+   struct vl_dri_context *vl_dri_ctx = priv;
+
+   assert(priv);
+
+   return vl_dri_ctx->is_locked;
+}
+
+static boolean
+vl_dri_lost_lock(void *priv)
+{
+   struct vl_dri_context *vl_dri_ctx = priv;
+
+   assert(priv);
+
+   return vl_dri_ctx->lost_lock;
+}
+
+static void
+vl_dri_clear_lost_lock(void *priv)
+{
+   struct vl_dri_context *vl_dri_ctx = priv;
+
+   assert(priv);
+
+   vl_dri_ctx->lost_lock = FALSE;
+}
+
+struct dri1_api_lock_funcs dri1_lf =
+{
+   .lock = vl_dri_lock,
+   .unlock = vl_dri_unlock,
+   .is_locked = vl_dri_is_locked,
+   .is_lock_lost = vl_dri_lost_lock,
+   .clear_lost_lock = vl_dri_clear_lost_lock
+};
+
+static void
+vl_dri_copy_version(struct dri1_api_version *dst, dri_version_t *src)
+{
+   assert(src);
+   assert(dst);
+   dst->major = src->major;
+   dst->minor = src->minor;
+   dst->patch_level = src->patch;
+}
+
+static boolean
+vl_dri_intersect_src_bbox(struct drm_clip_rect *dst, int dst_x, int dst_y,
+                          const struct drm_clip_rect *src, const struct drm_clip_rect *bbox)
+{
+   int xy1;
+   int xy2;
+
+   assert(dst);
+   assert(src);
+   assert(bbox);
+
+   xy1 = ((int)src->x1 > (int)bbox->x1 + dst_x) ? src->x1 :
+      (int)bbox->x1 + dst_x;
+   xy2 = ((int)src->x2 < (int)bbox->x2 + dst_x) ? src->x2 :
+      (int)bbox->x2 + dst_x;
+   if (xy1 >= xy2 || xy1 < 0)
+      return FALSE;
+
+   dst->x1 = xy1;
+   dst->x2 = xy2;
+
+   xy1 = ((int)src->y1 > (int)bbox->y1 + dst_y) ? src->y1 :
+      (int)bbox->y1 + dst_y;
+   xy2 = ((int)src->y2 < (int)bbox->y2 + dst_y) ? src->y2 :
+      (int)bbox->y2 + dst_y;
+   if (xy1 >= xy2 || xy1 < 0)
+      return FALSE;
+
+   dst->y1 = xy1;
+   dst->y2 = xy2;
+   return TRUE;
+}
+
+static void
+vl_clip_copy(struct vl_dri_context *vl_dri_ctx,
+	     struct pipe_surface *dst,
+	     struct pipe_surface *src,
+	     const struct drm_clip_rect *src_bbox)
+{
+   struct pipe_video_context *vpipe = vl_dri_ctx->base.vpipe;
+   struct drm_clip_rect clip;
+   struct drm_clip_rect *cur;
+   int i;
+
+   assert(vl_dri_ctx);
+   assert(dst);
+   assert(src);
+   assert(src_bbox);
+
+   assert(vl_dri_ctx->drawable->cliprects);
+   assert(vl_dri_ctx->drawable->num_cliprects > 0);
+
+   cur = vl_dri_ctx->drawable->cliprects;
+
+   for (i = 0; i < vl_dri_ctx->drawable->num_cliprects; ++i) {
+      if (vl_dri_intersect_src_bbox(&clip, vl_dri_ctx->drawable->x, vl_dri_ctx->drawable->y, cur++, src_bbox))
+         vpipe->surface_copy
+         (
+            vpipe, dst, clip.x1, clip.y1, src,
+            (int)clip.x1 - vl_dri_ctx->drawable->x,
+            (int)clip.y1 - vl_dri_ctx->drawable->y,
+            clip.x2 - clip.x1, clip.y2 - clip.y1
+         );
+   }
+}
+
+static void
+vl_dri_update_drawables_locked(struct vl_dri_context *vl_dri_ctx)
+{
+   struct vl_dri_screen *vl_dri_scrn;
+
+   assert(vl_dri_ctx);
+
+   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
+
+   if (vl_dri_ctx->lost_lock) {
+      vl_dri_ctx->lost_lock = FALSE;
+      DRI_VALIDATE_DRAWABLE_INFO(vl_dri_scrn->dri_screen, vl_dri_ctx->drawable);
+   }
+}
+
+static void
+vl_dri_flush_frontbuffer(struct pipe_screen *screen,
+                         struct pipe_surface *surf, void *context_private)
+{
+   struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)context_private;
+   struct vl_dri_screen *vl_dri_scrn;
+   struct drm_clip_rect src_bbox;
+   boolean save_lost_lock = FALSE;
+
+   assert(screen);
+   assert(surf);
+   assert(context_private);
+
+   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
+
+   vl_dri_lock(vl_dri_ctx);
+
+   save_lost_lock = vl_dri_ctx->lost_lock;
+
+   vl_dri_update_drawables_locked(vl_dri_ctx);
+
+   src_bbox.x1 = 0;
+   src_bbox.x2 = vl_dri_ctx->drawable->w;
+   src_bbox.y1 = 0;
+   src_bbox.y2 = vl_dri_ctx->drawable->h;
+
+#if 0
+   if (vl_dri_scrn->_api_hooks->present_locked)
+      vl_dri_scrn->api_hooks->present_locked(pipe, surf,
+                                             vl_dri_ctx->drawable->cliprects,
+                                             vl_dri_ctx->drawable->num_cliprects,
+                                             vl_dri_ctx->drawable->x, vl_dri_drawable->y,
+                                             &bbox, NULL /*fence*/);
+   else
+#endif
+   if (vl_dri_scrn->api_hooks->front_srf_locked) {
+      struct pipe_surface *front = vl_dri_scrn->api_hooks->front_srf_locked(screen);
+
+      if (front)
+         vl_clip_copy(vl_dri_ctx, front, surf, &src_bbox);
+
+      //st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, fence);
+   }
+
+   vl_dri_ctx->lost_lock = save_lost_lock;
+
+   vl_dri_unlock(vl_dri_ctx);
+}
+
+Drawable
+vl_video_bind_drawable(struct vl_context *vctx, Drawable drawable)
+{
+   struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)vctx;
+   struct vl_dri_screen *vl_dri_scrn;
+   dri_drawable_t *dri_drawable;
+   Drawable old_drawable = None;
+
+   assert(vctx);
+
+   if (vl_dri_ctx->drawable)
+      old_drawable = vl_dri_ctx->drawable->x_drawable;
+
+   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
+   driCreateDrawable(vl_dri_scrn->dri_screen, drawable, &dri_drawable);
+   vl_dri_ctx->drawable = dri_drawable;
+
+   return old_drawable;
+}
+
+struct vl_screen*
+vl_screen_create(Display *display, int screen)
+{
+   struct vl_dri_screen *vl_dri_scrn;
+   struct dri1_create_screen_arg arg;
+
+   assert(display);
+
+   vl_dri_scrn = CALLOC_STRUCT(vl_dri_screen);
+   if (!vl_dri_scrn)
+      return NULL;
+
+   driCreateScreen(display, screen, &vl_dri_scrn->dri_screen, &vl_dri_scrn->dri_framebuf);
+   vl_dri_scrn->api = drm_api_create();
+
+   arg.base.mode = DRM_CREATE_DRI1;
+   arg.lf = &dri1_lf;
+   arg.ddx_info = vl_dri_scrn->dri_framebuf.private;
+   arg.ddx_info_size = vl_dri_scrn->dri_framebuf.private_size;
+   arg.sarea = vl_dri_scrn->dri_screen->sarea;
+   vl_dri_copy_version(&arg.ddx_version, &vl_dri_scrn->dri_screen->ddx);
+   vl_dri_copy_version(&arg.dri_version, &vl_dri_scrn->dri_screen->dri);
+   vl_dri_copy_version(&arg.drm_version, &vl_dri_scrn->dri_screen->drm);
+   arg.api = NULL;
+
+   vl_dri_scrn->base.pscreen = vl_dri_scrn->api->create_screen(vl_dri_scrn->api,
+                                                               vl_dri_scrn->dri_screen->fd,
+                                                               &arg.base);
+
+   if (!vl_dri_scrn->base.pscreen) {
+      FREE(vl_dri_scrn);
+      return NULL;
+   }
+
+   vl_dri_scrn->visual = XDefaultVisual(display, screen);
+   vl_dri_scrn->api_hooks = arg.api;
+   vl_dri_scrn->base.pscreen->flush_frontbuffer = vl_dri_flush_frontbuffer;
+   /* XXX: Safe to call this while unlocked? */
+   vl_dri_scrn->base.format = vl_dri_scrn->api_hooks->front_srf_locked(vl_dri_scrn->base.pscreen)->format;
+
+   return &vl_dri_scrn->base;
+}
+
+void vl_screen_destroy(struct vl_screen *vscreen)
+{
+   struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)vscreen;
+
+   assert(vscreen);
+
+   vl_dri_scrn->base.pscreen->destroy(vl_dri_scrn->base.pscreen);
+   driDestroyScreen(vl_dri_scrn->dri_screen);
+   FREE(vl_dri_scrn);
+}
+
+struct vl_context*
+vl_video_create(struct vl_screen *vscreen,
+                enum pipe_video_profile profile,
+                enum pipe_video_chroma_format chroma_format,
+                unsigned width, unsigned height)
+{
+   struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)vscreen;
+   struct vl_dri_context *vl_dri_ctx;
+
+   vl_dri_ctx = CALLOC_STRUCT(vl_dri_context);
+   if (!vl_dri_ctx)
+      return NULL;
+
+   /* XXX: Is default visual correct/sufficient here? */
+   driCreateContext(vl_dri_scrn->dri_screen, vl_dri_scrn->visual, &vl_dri_ctx->dri_context);
+
+   if (!vl_dri_scrn->api->create_video_context) {
+      debug_printf("[G3DVL] No video support found on %s/%s.\n",
+                   vl_dri_scrn->base.pscreen->get_vendor(vl_dri_scrn->base.pscreen),
+                   vl_dri_scrn->base.pscreen->get_name(vl_dri_scrn->base.pscreen));
+      FREE(vl_dri_ctx);
+      return NULL;
+   }
+
+   vl_dri_ctx->base.vpipe = vl_dri_scrn->api->create_video_context(vl_dri_scrn->api,
+                                                                   vscreen->pscreen,
+                                                                   profile, chroma_format,
+                                                                   width, height);
+
+   if (!vl_dri_ctx->base.vpipe) {
+      FREE(vl_dri_ctx);
+      return NULL;
+   }
+
+   vl_dri_ctx->base.vpipe->priv = vl_dri_ctx;
+   vl_dri_ctx->base.vscreen = vscreen;
+   vl_dri_ctx->fd = vl_dri_scrn->dri_screen->fd;
+   vl_dri_ctx->lock = (drmLock*)&vl_dri_scrn->dri_screen->sarea->lock;
+
+   return &vl_dri_ctx->base;
+}
+
+void vl_video_destroy(struct vl_context *vctx)
+{
+   struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)vctx;
+
+   assert(vctx);
+
+   vl_dri_ctx->base.vpipe->destroy(vl_dri_ctx->base.vpipe);
+   FREE(vl_dri_ctx);
+}
diff --git a/src/gallium/winsys/g3dvl/dri/driclient.c b/src/gallium/winsys/g3dvl/dri/driclient.c
new file mode 100644
index 00000000000..dc2189afec3
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/dri/driclient.c
@@ -0,0 +1,310 @@
+#include "driclient.h"
+#include <assert.h>
+#include <stdlib.h>
+
+int driCreateScreen(Display *display, int screen, dri_screen_t **dri_screen, dri_framebuffer_t *dri_framebuf)
+{
+	int		evbase, errbase;
+	char		*driver_name;
+	int		newly_opened;
+	drm_magic_t	magic;
+	drmVersionPtr	drm_version;
+	drm_handle_t	sarea_handle;
+	char		*bus_id;
+	dri_screen_t	*dri_scrn;
+
+	assert(display);
+	assert(dri_screen);
+
+	if (!XF86DRIQueryExtension(display, &evbase, &errbase))
+		return 1;
+
+	dri_scrn = calloc(1, sizeof(dri_screen_t));
+
+	if (!dri_scrn)
+		return 1;
+
+	if (!XF86DRIQueryVersion(display, &dri_scrn->dri.major, &dri_scrn->dri.minor, &dri_scrn->dri.patch))
+		goto free_screen;
+
+	dri_scrn->display = display;
+	dri_scrn->num = screen;
+	dri_scrn->draw_lock_id = 1;
+
+	if (!XF86DRIOpenConnection(display, screen, &sarea_handle, &bus_id))
+		goto free_screen;
+
+	dri_scrn->fd = -1;
+	dri_scrn->fd = drmOpenOnce(NULL, bus_id, &newly_opened);
+	XFree(bus_id);
+
+	if (dri_scrn->fd < 0)
+		goto close_connection;
+
+	if (drmGetMagic(dri_scrn->fd, &magic))
+		goto close_drm;
+
+	drm_version = drmGetVersion(dri_scrn->fd);
+
+	if (!drm_version)
+		goto close_drm;
+
+	dri_scrn->drm.major = drm_version->version_major;
+	dri_scrn->drm.minor = drm_version->version_minor;
+	dri_scrn->drm.patch = drm_version->version_patchlevel;
+	drmFreeVersion(drm_version);
+
+	if (!XF86DRIAuthConnection(display, screen, magic))
+		goto close_drm;
+
+	if (!XF86DRIGetClientDriverName
+	(
+		display,
+		screen,
+		&dri_scrn->ddx.major,
+		&dri_scrn->ddx.minor,
+		&dri_scrn->ddx.patch,
+		&driver_name
+	))
+		goto close_drm;
+
+	if (drmMap(dri_scrn->fd, sarea_handle, SAREA_MAX, (drmAddress)&dri_scrn->sarea))
+		goto close_drm;
+
+	dri_scrn->drawable_hash = drmHashCreate();
+
+	if (!dri_scrn->drawable_hash)
+		goto unmap_sarea;
+
+	if (dri_framebuf)
+	{
+		if (!XF86DRIGetDeviceInfo
+		(
+			display,
+			screen, &dri_framebuf->drm_handle,
+			&dri_framebuf->base,
+			&dri_framebuf->size,
+			&dri_framebuf->stride,
+			&dri_framebuf->private_size,
+			&dri_framebuf->private
+		))
+			goto destroy_hash;
+	}
+
+	*dri_screen = dri_scrn;
+
+	return 0;
+
+destroy_hash:
+	drmHashDestroy(dri_scrn->drawable_hash);
+unmap_sarea:
+	drmUnmap(dri_scrn->sarea, SAREA_MAX);
+close_drm:
+	drmCloseOnce(dri_scrn->fd);
+close_connection:
+	XF86DRICloseConnection(display, screen);
+free_screen:
+	free(dri_scrn);
+
+	return 1;
+}
+
+int driDestroyScreen(dri_screen_t *dri_screen)
+{
+	Drawable	draw;
+	dri_drawable_t	*dri_draw;
+
+	assert(dri_screen);
+
+	if (drmHashFirst(dri_screen->drawable_hash, &draw, (void**)&dri_draw))
+	{
+		dri_draw->refcount = 1;
+		driDestroyDrawable(dri_draw);
+
+		while (drmHashNext(dri_screen->drawable_hash, &draw, (void**)&dri_draw))
+		{
+			dri_draw->refcount = 1;
+			driDestroyDrawable(dri_draw);
+		}
+	}
+
+	drmHashDestroy(dri_screen->drawable_hash);
+	drmUnmap(dri_screen->sarea, SAREA_MAX);
+	drmCloseOnce(dri_screen->fd);
+	XF86DRICloseConnection(dri_screen->display, dri_screen->num);
+	free(dri_screen);
+
+	return 0;
+}
+
+int driCreateDrawable(dri_screen_t *dri_screen, Drawable drawable, dri_drawable_t **dri_drawable)
+{
+	int		evbase, errbase;
+	dri_drawable_t	*dri_draw;
+
+	assert(dri_screen);
+	assert(dri_drawable);
+
+	if (!XF86DRIQueryExtension(dri_screen->display, &evbase, &errbase))
+		return 1;
+
+	if (!drmHashLookup(dri_screen->drawable_hash, drawable, (void**)dri_drawable))
+	{
+		/* Found */
+		(*dri_drawable)->refcount++;
+		return 0;
+	}
+
+	dri_draw = calloc(1, sizeof(dri_drawable_t));
+
+	if (!dri_draw)
+		return 1;
+
+	if (!XF86DRICreateDrawable(dri_screen->display, 0, drawable, &dri_draw->drm_drawable))
+	{
+		free(dri_draw);
+		return 1;
+	}
+
+	dri_draw->x_drawable = drawable;
+	dri_draw->sarea_index = 0;
+	dri_draw->sarea_stamp = NULL;
+	dri_draw->last_sarea_stamp = 0;
+	dri_draw->dri_screen = dri_screen;
+	dri_draw->refcount = 1;
+
+	if (drmHashInsert(dri_screen->drawable_hash, drawable, dri_draw))
+	{
+		XF86DRIDestroyDrawable(dri_screen->display, dri_screen->num, drawable);
+		free(dri_draw);
+		return 1;
+	}
+
+	if (!dri_draw->sarea_stamp || *dri_draw->sarea_stamp != dri_draw->last_sarea_stamp)
+	{
+		DRM_SPINLOCK(&dri_screen->sarea->drawable_lock, dri_screen->draw_lock_id);
+
+		if (driUpdateDrawableInfo(dri_draw))
+		{
+			XF86DRIDestroyDrawable(dri_screen->display, dri_screen->num, drawable);
+			free(dri_draw);
+			DRM_SPINUNLOCK(&dri_screen->sarea->drawable_lock, dri_screen->draw_lock_id);
+			return 1;
+		}
+
+		DRM_SPINUNLOCK(&dri_screen->sarea->drawable_lock, dri_screen->draw_lock_id);
+	}
+
+	*dri_drawable = dri_draw;
+
+	return 0;
+}
+
+int driUpdateDrawableInfo(dri_drawable_t *dri_drawable)
+{
+	assert(dri_drawable);
+
+	if (dri_drawable->cliprects)
+	{
+		XFree(dri_drawable->cliprects);
+		dri_drawable->cliprects = NULL;
+	}
+	if (dri_drawable->back_cliprects)
+	{
+		XFree(dri_drawable->back_cliprects);
+		dri_drawable->back_cliprects = NULL;
+	}
+
+	DRM_SPINUNLOCK(&dri_drawable->dri_screen->sarea->drawable_lock, dri_drawable->dri_screen->draw_lock_id);
+
+	if (!XF86DRIGetDrawableInfo
+	(
+		dri_drawable->dri_screen->display,
+		dri_drawable->dri_screen->num,
+		dri_drawable->x_drawable,
+    		&dri_drawable->sarea_index,
+    		&dri_drawable->last_sarea_stamp,
+    		&dri_drawable->x,
+    		&dri_drawable->y,
+    		&dri_drawable->w,
+    		&dri_drawable->h,
+		&dri_drawable->num_cliprects,
+		&dri_drawable->cliprects,
+		&dri_drawable->back_x,
+		&dri_drawable->back_y,
+		&dri_drawable->num_back_cliprects,
+		&dri_drawable->back_cliprects
+	))
+	{
+		dri_drawable->sarea_stamp = &dri_drawable->last_sarea_stamp;
+		dri_drawable->num_cliprects = 0;
+		dri_drawable->cliprects = NULL;
+		dri_drawable->num_back_cliprects = 0;
+		dri_drawable->back_cliprects = 0;
+
+		return 1;
+	}
+	else
+		dri_drawable->sarea_stamp = &dri_drawable->dri_screen->sarea->drawableTable[dri_drawable->sarea_index].stamp;
+
+	DRM_SPINLOCK(&dri_drawable->dri_screen->sarea->drawable_lock, dri_drawable->dri_screen->draw_lock_id);
+
+	return 0;
+}
+
+int driDestroyDrawable(dri_drawable_t *dri_drawable)
+{
+	assert(dri_drawable);
+
+	if (--dri_drawable->refcount == 0)
+	{
+		if (dri_drawable->cliprects)
+			XFree(dri_drawable->cliprects);
+		if (dri_drawable->back_cliprects)
+			XFree(dri_drawable->back_cliprects);
+		drmHashDelete(dri_drawable->dri_screen->drawable_hash, dri_drawable->x_drawable);
+		XF86DRIDestroyDrawable(dri_drawable->dri_screen->display, dri_drawable->dri_screen->num, dri_drawable->x_drawable);
+		free(dri_drawable);
+	}
+
+	return 0;
+}
+
+int driCreateContext(dri_screen_t *dri_screen, Visual *visual, dri_context_t **dri_context)
+{
+	int		evbase, errbase;
+	dri_context_t	*dri_ctx;
+
+	assert(dri_screen);
+	assert(visual);
+	assert(dri_context);
+
+	if (!XF86DRIQueryExtension(dri_screen->display, &evbase, &errbase))
+		return 1;
+
+	dri_ctx = calloc(1, sizeof(dri_context_t));
+
+	if (!dri_ctx)
+		return 1;
+
+	if (!XF86DRICreateContext(dri_screen->display, dri_screen->num, visual, &dri_ctx->id, &dri_ctx->drm_context))
+	{
+		free(dri_ctx);
+		return 1;
+	}
+
+	dri_ctx->dri_screen = dri_screen;
+	*dri_context = dri_ctx;
+
+	return 0;
+}
+
+int driDestroyContext(dri_context_t *dri_context)
+{
+	assert(dri_context);
+
+	XF86DRIDestroyContext(dri_context->dri_screen->display, dri_context->dri_screen->num, dri_context->id);
+	free(dri_context);
+
+	return 0;
+}
diff --git a/src/gallium/winsys/g3dvl/dri/driclient.h b/src/gallium/winsys/g3dvl/dri/driclient.h
new file mode 100644
index 00000000000..d3915250392
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/dri/driclient.h
@@ -0,0 +1,97 @@
+#ifndef driclient_h
+#define driclient_h
+
+#include <stdint.h>
+#include <X11/Xlib.h>
+#include <drm_sarea.h>
+#include "xf86dri.h"
+
+/* TODO: Bring in DRI XML options */
+
+typedef struct dri_version
+{
+	int major;
+	int minor;
+	int patch;
+} dri_version_t;
+
+typedef struct dri_screen
+{
+	Display			*display;
+	unsigned int		num;
+	dri_version_t		ddx, dri, drm;
+	int			draw_lock_id;
+	int			fd;
+	drm_sarea_t		*sarea;
+	void			*drawable_hash;
+	void			*private;
+} dri_screen_t;
+
+struct dri_context;
+
+typedef struct dri_drawable
+{
+	drm_drawable_t		drm_drawable;
+	Drawable		x_drawable;
+	unsigned int		sarea_index;
+	unsigned int		*sarea_stamp;
+	unsigned int		last_sarea_stamp;
+	int			x, y, w, h;
+	int			back_x, back_y;
+	int			num_cliprects, num_back_cliprects;
+	drm_clip_rect_t		*cliprects, *back_cliprects;
+	dri_screen_t		*dri_screen;
+	unsigned int		refcount;
+	void			*private;
+} dri_drawable_t;
+
+typedef struct dri_context
+{
+	XID			id;
+	drm_context_t		drm_context;
+	dri_screen_t		*dri_screen;
+	void			*private;
+} dri_context_t;
+
+typedef struct dri_framebuffer
+{
+	drm_handle_t		drm_handle;
+	int			base, size, stride;
+	int			private_size;
+	void			*private;
+} dri_framebuffer_t;
+
+int driCreateScreen(Display *display, int screen, dri_screen_t **dri_screen, dri_framebuffer_t *dri_framebuf);
+int driDestroyScreen(dri_screen_t *dri_screen);
+int driCreateDrawable(dri_screen_t *dri_screen, Drawable drawable, dri_drawable_t **dri_drawable);
+int driUpdateDrawableInfo(dri_drawable_t *dri_drawable);
+int driDestroyDrawable(dri_drawable_t *dri_drawable);
+int driCreateContext(dri_screen_t *dri_screen, Visual *visual, dri_context_t **dri_context);
+int driDestroyContext(dri_context_t *dri_context);
+
+#define DRI_VALIDATE_DRAWABLE_INFO_ONCE(dri_drawable)					\
+do											\
+{											\
+	if (*(dri_drawable->sarea_stamp) != dri_drawable->last_sarea_stamp)		\
+		driUpdateDrawableInfo(dri_drawable);					\
+} while (0)
+
+#define DRI_VALIDATE_DRAWABLE_INFO(dri_screen, dri_drawable)					\
+do												\
+{												\
+	while (*(dri_drawable->sarea_stamp) != dri_drawable->last_sarea_stamp)			\
+	{											\
+		register unsigned int hwContext = dri_screen->sarea->lock.lock &		\
+		~(DRM_LOCK_HELD | DRM_LOCK_CONT);						\
+		DRM_UNLOCK(dri_screen->fd, &dri_screen->sarea->lock, hwContext);		\
+												\
+		DRM_SPINLOCK(&dri_screen->sarea->drawable_lock, dri_screen->draw_lock_id);	\
+		DRI_VALIDATE_DRAWABLE_INFO_ONCE(dri_drawable);					\
+		DRM_SPINUNLOCK(&dri_screen->sarea->drawable_lock, dri_screen->draw_lock_id);	\
+												\
+		DRM_LIGHT_LOCK(dri_screen->fd, &dri_screen->sarea->lock, hwContext);		\
+	}											\
+} while (0)
+
+#endif
+
diff --git a/src/gallium/winsys/g3dvl/dri/xf86dri.h b/src/gallium/winsys/g3dvl/dri/xf86dri.h
new file mode 100644
index 00000000000..baf80a7a9dd
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/dri/xf86dri.h
@@ -0,0 +1,119 @@
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+Copyright 2000 VA Linux Systems, Inc.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file xf86dri.h
+ * Protocol numbers and function prototypes for DRI X protocol.
+ *
+ * \author Kevin E. Martin <martin@valinux.com>
+ * \author Jens Owen <jens@tungstengraphics.com>
+ * \author Rickard E. (Rik) Faith <faith@valinux.com>
+ */
+
+#ifndef _XF86DRI_H_
+#define _XF86DRI_H_
+
+#include <X11/Xfuncproto.h>
+#include <xf86drm.h>
+
+#define X_XF86DRIQueryVersion			0
+#define X_XF86DRIQueryDirectRenderingCapable	1
+#define X_XF86DRIOpenConnection			2
+#define X_XF86DRICloseConnection		3
+#define X_XF86DRIGetClientDriverName		4
+#define X_XF86DRICreateContext			5
+#define X_XF86DRIDestroyContext			6
+#define X_XF86DRICreateDrawable			7
+#define X_XF86DRIDestroyDrawable		8
+#define X_XF86DRIGetDrawableInfo		9
+#define X_XF86DRIGetDeviceInfo			10
+#define X_XF86DRIAuthConnection                 11
+#define X_XF86DRIOpenFullScreen                 12   /* Deprecated */
+#define X_XF86DRICloseFullScreen                13   /* Deprecated */
+
+#define XF86DRINumberEvents		0
+
+#define XF86DRIClientNotLocal		0
+#define XF86DRIOperationNotSupported	1
+#define XF86DRINumberErrors		(XF86DRIOperationNotSupported + 1)
+
+#ifndef _XF86DRI_SERVER_
+
+_XFUNCPROTOBEGIN
+
+Bool XF86DRIQueryExtension( Display *dpy, int *event_base, int *error_base );
+
+Bool XF86DRIQueryVersion( Display *dpy, int *majorVersion, int *minorVersion,
+    int *patchVersion );
+
+Bool XF86DRIQueryDirectRenderingCapable( Display *dpy, int screen,
+    Bool *isCapable );
+
+Bool XF86DRIOpenConnection( Display *dpy, int screen, drm_handle_t *hSAREA,
+    char **busIDString );
+
+Bool XF86DRIAuthConnection( Display *dpy, int screen, drm_magic_t magic );
+
+Bool XF86DRICloseConnection( Display *dpy, int screen );
+
+Bool XF86DRIGetClientDriverName( Display *dpy, int screen,
+    int *ddxDriverMajorVersion, int *ddxDriverMinorVersion,
+    int *ddxDriverPatchVersion, char **clientDriverName );
+
+Bool XF86DRICreateContext( Display *dpy, int screen, Visual *visual,
+    XID *ptr_to_returned_context_id, drm_context_t *hHWContext );
+
+Bool XF86DRICreateContextWithConfig( Display *dpy, int screen, int configID,
+    XID *ptr_to_returned_context_id, drm_context_t *hHWContext );
+
+Bool XF86DRIDestroyContext( Display *dpy, int screen,
+    XID context_id );
+
+Bool XF86DRICreateDrawable( Display *dpy, int screen,
+    Drawable drawable, drm_drawable_t *hHWDrawable );
+
+Bool XF86DRIDestroyDrawable( Display *dpy, int screen, 
+    Drawable drawable);
+
+Bool XF86DRIGetDrawableInfo( Display *dpy, int screen, Drawable drawable,
+    unsigned int *index, unsigned int *stamp, 
+    int *X, int *Y, int *W, int *H,
+    int *numClipRects, drm_clip_rect_t ** pClipRects,
+    int *backX, int *backY,
+    int *numBackClipRects, drm_clip_rect_t **pBackClipRects );
+
+Bool XF86DRIGetDeviceInfo( Display *dpy, int screen,
+    drm_handle_t *hFrameBuffer, int *fbOrigin, int *fbSize,
+    int *fbStride, int *devPrivateSize, void **pDevPrivate );
+
+_XFUNCPROTOEND
+
+#endif /* _XF86DRI_SERVER_ */
+
+#endif /* _XF86DRI_H_ */
+
diff --git a/src/gallium/winsys/g3dvl/dri/xf86dristr.h b/src/gallium/winsys/g3dvl/dri/xf86dristr.h
new file mode 100644
index 00000000000..b834bd1a1a0
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/dri/xf86dristr.h
@@ -0,0 +1,342 @@
+/**************************************************************************
+
+Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas.
+Copyright 2000 VA Linux Systems, Inc.
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sub license, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ *   Kevin E. Martin <martin@valinux.com>
+ *   Jens Owen <jens@tungstengraphics.com>
+ *   Rickard E. (Rik) Fiath <faith@valinux.com>
+ *
+ */
+
+#ifndef _XF86DRISTR_H_
+#define _XF86DRISTR_H_
+
+#include "xf86dri.h"
+
+#define XF86DRINAME "XFree86-DRI"
+
+/* The DRI version number.  This was originally set to be the same of the
+ * XFree86 version number.  However, this version is really indepedent of
+ * the XFree86 version.
+ *
+ * Version History:
+ *    4.0.0: Original
+ *    4.0.1: Patch to bump clipstamp when windows are destroyed, 28 May 02
+ *    4.1.0: Add transition from single to multi in DRMInfo rec, 24 Jun 02
+ */
+#define XF86DRI_MAJOR_VERSION	4
+#define XF86DRI_MINOR_VERSION	1
+#define XF86DRI_PATCH_VERSION	0
+
+typedef struct _XF86DRIQueryVersion {
+    CARD8	reqType;		/* always DRIReqCode */
+    CARD8	driReqType;		/* always X_DRIQueryVersion */
+    CARD16	length B16;
+} xXF86DRIQueryVersionReq;
+#define sz_xXF86DRIQueryVersionReq	4
+
+typedef struct {
+    BYTE	type;			/* X_Reply */
+    BOOL	pad1;
+    CARD16	sequenceNumber B16;
+    CARD32	length B32;
+    CARD16	majorVersion B16;	/* major version of DRI protocol */
+    CARD16	minorVersion B16;	/* minor version of DRI protocol */
+    CARD32	patchVersion B32;       /* patch version of DRI protocol */
+    CARD32	pad3 B32;
+    CARD32	pad4 B32;
+    CARD32	pad5 B32;
+    CARD32	pad6 B32;
+} xXF86DRIQueryVersionReply;
+#define sz_xXF86DRIQueryVersionReply	32
+
+typedef struct _XF86DRIQueryDirectRenderingCapable {
+    CARD8	reqType;		/* always DRIReqCode */
+    CARD8	driReqType;		/* X_DRIQueryDirectRenderingCapable */
+    CARD16	length B16;
+    CARD32	screen B32;
+} xXF86DRIQueryDirectRenderingCapableReq;
+#define sz_xXF86DRIQueryDirectRenderingCapableReq	8
+
+typedef struct {
+    BYTE	type;			/* X_Reply */
+    BOOL	pad1;
+    CARD16	sequenceNumber B16;
+    CARD32	length B32;
+    BOOL	isCapable;
+    BOOL	pad2;
+    BOOL	pad3;
+    BOOL	pad4;
+    CARD32	pad5 B32;
+    CARD32	pad6 B32;
+    CARD32	pad7 B32;
+    CARD32	pad8 B32;
+    CARD32	pad9 B32;
+} xXF86DRIQueryDirectRenderingCapableReply;
+#define sz_xXF86DRIQueryDirectRenderingCapableReply	32
+
+typedef struct _XF86DRIOpenConnection {
+    CARD8	reqType;		/* always DRIReqCode */
+    CARD8	driReqType;		/* always X_DRIOpenConnection */
+    CARD16	length B16;
+    CARD32	screen B32;
+} xXF86DRIOpenConnectionReq;
+#define sz_xXF86DRIOpenConnectionReq	8
+
+typedef struct {
+    BYTE	type;			/* X_Reply */
+    BOOL	pad1;
+    CARD16	sequenceNumber B16;
+    CARD32	length B32;
+    CARD32	hSAREALow B32;
+    CARD32	hSAREAHigh B32;
+    CARD32	busIdStringLength B32;
+    CARD32	pad6 B32;
+    CARD32	pad7 B32;
+    CARD32	pad8 B32;
+} xXF86DRIOpenConnectionReply;
+#define sz_xXF86DRIOpenConnectionReply	32
+
+typedef struct _XF86DRIAuthConnection {
+    CARD8	reqType;		/* always DRIReqCode */
+    CARD8	driReqType;		/* always X_DRICloseConnection */
+    CARD16	length B16;
+    CARD32	screen B32;
+    CARD32      magic B32;
+} xXF86DRIAuthConnectionReq;
+#define sz_xXF86DRIAuthConnectionReq	12
+
+typedef struct {
+    BYTE        type;
+    BOOL        pad1;
+    CARD16      sequenceNumber B16;
+    CARD32      length B32;
+    CARD32      authenticated B32;
+    CARD32      pad2 B32;
+    CARD32      pad3 B32;
+    CARD32      pad4 B32;
+    CARD32      pad5 B32;
+    CARD32      pad6 B32;
+} xXF86DRIAuthConnectionReply;
+#define zx_xXF86DRIAuthConnectionReply  32
+
+typedef struct _XF86DRICloseConnection {
+    CARD8	reqType;		/* always DRIReqCode */
+    CARD8	driReqType;		/* always X_DRICloseConnection */
+    CARD16	length B16;
+    CARD32	screen B32;
+} xXF86DRICloseConnectionReq;
+#define sz_xXF86DRICloseConnectionReq	8
+
+typedef struct _XF86DRIGetClientDriverName {
+    CARD8	reqType;		/* always DRIReqCode */
+    CARD8	driReqType;		/* always X_DRIGetClientDriverName */
+    CARD16	length B16;
+    CARD32	screen B32;
+} xXF86DRIGetClientDriverNameReq;
+#define sz_xXF86DRIGetClientDriverNameReq	8
+
+typedef struct {
+    BYTE	type;			/* X_Reply */
+    BOOL	pad1;
+    CARD16	sequenceNumber B16;
+    CARD32	length B32;
+    CARD32	ddxDriverMajorVersion B32;
+    CARD32	ddxDriverMinorVersion B32;
+    CARD32	ddxDriverPatchVersion B32;
+    CARD32	clientDriverNameLength B32;
+    CARD32	pad5 B32;
+    CARD32	pad6 B32;
+} xXF86DRIGetClientDriverNameReply;
+#define sz_xXF86DRIGetClientDriverNameReply	32
+
+typedef struct _XF86DRICreateContext {
+    CARD8	reqType;		/* always DRIReqCode */
+    CARD8	driReqType;		/* always X_DRICreateContext */
+    CARD16	length B16;
+    CARD32	screen B32;
+    CARD32	visual B32;
+    CARD32	context B32;
+} xXF86DRICreateContextReq;
+#define sz_xXF86DRICreateContextReq	16
+
+typedef struct {
+    BYTE	type;			/* X_Reply */
+    BOOL	pad1;
+    CARD16	sequenceNumber B16;
+    CARD32	length B32;
+    CARD32	hHWContext B32;
+    CARD32	pad2 B32;
+    CARD32	pad3 B32;
+    CARD32	pad4 B32;
+    CARD32	pad5 B32;
+    CARD32	pad6 B32;
+} xXF86DRICreateContextReply;
+#define sz_xXF86DRICreateContextReply	32
+
+typedef struct _XF86DRIDestroyContext {
+    CARD8	reqType;		/* always DRIReqCode */
+    CARD8	driReqType;		/* always X_DRIDestroyContext */
+    CARD16	length B16;
+    CARD32	screen B32;
+    CARD32	context B32;
+} xXF86DRIDestroyContextReq;
+#define sz_xXF86DRIDestroyContextReq	12
+
+typedef struct _XF86DRICreateDrawable {
+    CARD8	reqType;		/* always DRIReqCode */
+    CARD8	driReqType;		/* always X_DRICreateDrawable */
+    CARD16	length B16;
+    CARD32	screen B32;
+    CARD32	drawable B32;
+} xXF86DRICreateDrawableReq;
+#define sz_xXF86DRICreateDrawableReq	12
+
+typedef struct {
+    BYTE	type;			/* X_Reply */
+    BOOL	pad1;
+    CARD16	sequenceNumber B16;
+    CARD32	length B32;
+    CARD32	hHWDrawable B32;
+    CARD32	pad2 B32;
+    CARD32	pad3 B32;
+    CARD32	pad4 B32;
+    CARD32	pad5 B32;
+    CARD32	pad6 B32;
+} xXF86DRICreateDrawableReply;
+#define sz_xXF86DRICreateDrawableReply	32
+
+typedef struct _XF86DRIDestroyDrawable {
+    CARD8	reqType;		/* always DRIReqCode */
+    CARD8	driReqType;		/* always X_DRIDestroyDrawable */
+    CARD16	length B16;
+    CARD32	screen B32;
+    CARD32	drawable B32;
+} xXF86DRIDestroyDrawableReq;
+#define sz_xXF86DRIDestroyDrawableReq	12
+
+typedef struct _XF86DRIGetDrawableInfo {
+    CARD8	reqType;		/* always DRIReqCode */
+    CARD8	driReqType;		/* always X_DRIGetDrawableInfo */
+    CARD16	length B16;
+    CARD32	screen B32;
+    CARD32	drawable B32;
+} xXF86DRIGetDrawableInfoReq;
+#define sz_xXF86DRIGetDrawableInfoReq	12
+
+typedef struct {
+    BYTE	type;			/* X_Reply */
+    BOOL	pad1;
+    CARD16	sequenceNumber B16;
+    CARD32	length B32;
+    CARD32	drawableTableIndex B32;
+    CARD32	drawableTableStamp B32;
+    INT16	drawableX B16;
+    INT16	drawableY B16;
+    INT16	drawableWidth B16;
+    INT16	drawableHeight B16;
+    CARD32	numClipRects B32;
+    INT16       backX B16;
+    INT16       backY B16;
+    CARD32      numBackClipRects B32;
+} xXF86DRIGetDrawableInfoReply;
+
+#define sz_xXF86DRIGetDrawableInfoReply	36
+
+
+typedef struct _XF86DRIGetDeviceInfo {
+    CARD8	reqType;		/* always DRIReqCode */
+    CARD8	driReqType;		/* always X_DRIGetDeviceInfo */
+    CARD16	length B16;
+    CARD32	screen B32;
+} xXF86DRIGetDeviceInfoReq;
+#define sz_xXF86DRIGetDeviceInfoReq	8
+
+typedef struct {
+    BYTE	type;			/* X_Reply */
+    BOOL	pad1;
+    CARD16	sequenceNumber B16;
+    CARD32	length B32;
+    CARD32	hFrameBufferLow B32;
+    CARD32	hFrameBufferHigh B32;
+    CARD32	framebufferOrigin B32;
+    CARD32	framebufferSize B32;
+    CARD32	framebufferStride B32;
+    CARD32	devPrivateSize B32;
+} xXF86DRIGetDeviceInfoReply;
+#define sz_xXF86DRIGetDeviceInfoReply	32
+
+typedef struct _XF86DRIOpenFullScreen {
+    CARD8       reqType;	/* always DRIReqCode */
+    CARD8       driReqType;	/* always X_DRIOpenFullScreen */
+    CARD16      length B16;
+    CARD32      screen B32;
+    CARD32      drawable B32;
+} xXF86DRIOpenFullScreenReq;
+#define sz_xXF86DRIOpenFullScreenReq    12
+
+typedef struct {
+    BYTE        type;
+    BOOL        pad1;
+    CARD16      sequenceNumber B16;
+    CARD32      length B32;
+    CARD32      isFullScreen B32;
+    CARD32      pad2 B32;
+    CARD32      pad3 B32;
+    CARD32      pad4 B32;
+    CARD32      pad5 B32;
+    CARD32      pad6 B32;
+} xXF86DRIOpenFullScreenReply;
+#define sz_xXF86DRIOpenFullScreenReply  32
+
+typedef struct _XF86DRICloseFullScreen {
+    CARD8       reqType;	/* always DRIReqCode */
+    CARD8       driReqType;	/* always X_DRICloseFullScreen */
+    CARD16      length B16;
+    CARD32      screen B32;
+    CARD32      drawable B32;
+} xXF86DRICloseFullScreenReq;
+#define sz_xXF86DRICloseFullScreenReq   12
+
+typedef struct {
+    BYTE        type;
+    BOOL        pad1;
+    CARD16      sequenceNumber B16;
+    CARD32      length B32;
+    CARD32      pad2 B32;
+    CARD32      pad3 B32;
+    CARD32      pad4 B32;
+    CARD32      pad5 B32;
+    CARD32      pad6 B32;
+    CARD32      pad7 B32;
+} xXF86DRICloseFullScreenReply;
+#define sz_xXF86DRICloseFullScreenReply  32
+
+
+#endif /* _XF86DRISTR_H_ */
diff --git a/src/gallium/winsys/g3dvl/drm/Makefile b/src/gallium/winsys/g3dvl/drm/Makefile
new file mode 100644
index 00000000000..0711f44d8e3
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/drm/Makefile
@@ -0,0 +1,12 @@
+# src/gallium/winsys/Makefile
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+SUBDIRS = $(GALLIUM_WINSYS_DRM_DIRS)
+
+default install clean:
+	@for dir in $(SUBDIRS) ; do \
+		if [ -d $$dir ] ; then \
+			(cd $$dir && $(MAKE) $@) || exit 1; \
+		fi \
+	done
diff --git a/src/gallium/winsys/g3dvl/drm/Makefile.template b/src/gallium/winsys/g3dvl/drm/Makefile.template
new file mode 100644
index 00000000000..a33d95677c5
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/drm/Makefile.template
@@ -0,0 +1,72 @@
+XVMC_MAJOR = 1
+XVMC_MINOR = 0
+XVMC_LIB = XvMCg3dvl
+XVMC_LIB_NAME = lib$(XVMC_LIB).so
+XVMC_LIB_DEPS = $(EXTRA_LIB_PATH) -lXvMC -lXv -lX11 -lm
+
+###############################################################
+
+INCLUDES = $(DRIVER_INCLUDES) \
+           -I$(TOP)/src/gallium/include \
+           -I$(TOP)/src/gallium/auxiliary \
+           -I$(TOP)/src/gallium/drivers \
+           -I$(TOP)/src/gallium/winsys/g3dvl \
+           -I$(TOP)/src/gallium/winsys/g3dvl/dri
+
+DEFINES += $(DRIVER_DEFINES) \
+           -DGALLIUM_SOFTPIPE \
+	   -DGALLIUM_TRACE
+
+# XXX: Hack, if we include libxvmctracker.a in LIBS none of the symbols are
+# pulled in by the linker because xsp_winsys.c doesn't refer to them
+OBJECTS = $(C_SOURCES:.c=.o) $(TOP)/src/gallium/state_trackers/xorg/xvmc/*.o
+
+LIBS = $(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
+       $(PIPE_DRIVERS) \
+       $(TOP)/src/gallium/auxiliary/vl/libvl.a \
+       $(TOP)/src/gallium/auxiliary/draw/libdraw.a \
+       $(TOP)/src/gallium/auxiliary/tgsi/libtgsi.a \
+       $(TOP)/src/gallium/auxiliary/translate/libtranslate.a \
+       $(TOP)/src/gallium/auxiliary/cso_cache/libcso_cache.a \
+       $(TOP)/src/gallium/auxiliary/rtasm/librtasm.a \
+       $(TOP)/src/gallium/auxiliary/util/libutil.a
+
+.c.o:
+	$(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@
+
+.S.o:
+	$(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@
+
+.PHONY: default $(TOP)/$(LIB_DIR)/gallium clean
+
+default: depend $(TOP)/$(LIB_DIR)/gallium $(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME)
+
+$(TOP)/$(LIB_DIR)/gallium:
+	@mkdir -p $(TOP)/$(LIB_DIR)/gallium
+
+# Make the libXvMCg3dvl.so library
+$(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME): $(OBJECTS) $(LIBS) Makefile
+	$(MKLIB) -o $(XVMC_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+		-major $(XVMC_MAJOR) -minor $(XVMC_MINOR) $(MKLIB_OPTIONS) \
+		-install $(TOP)/$(LIB_DIR)/gallium -id $(INSTALL_LIB_DIR)/lib$(XVMC_LIB).1.dylib \
+		$(XVMC_LIB_DEPS) $(DRIVER_LIB_DEPS) $(OBJECTS) $(LIBS)
+
+depend: $(SOURCES) Makefile
+	$(RM) depend
+	touch depend
+	$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDES) $(SOURCES)
+
+#install: default
+#	$(INSTALL) -d $(INSTALL_DIR)/include/GL
+#	$(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR)
+#	$(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL
+#	@if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \
+#		$(INSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \
+#	fi
+
+clean: Makefile
+	$(RM) $(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME)
+	$(RM) *.o *~
+	$(RM) depend depend.bak
+
+-include depend
diff --git a/src/gallium/winsys/g3dvl/drm/nouveau/Makefile b/src/gallium/winsys/g3dvl/drm/nouveau/Makefile
new file mode 100644
index 00000000000..7ff448421a6
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/drm/nouveau/Makefile
@@ -0,0 +1,26 @@
+# This makefile produces a libXvMCg3dvl.so which is
+# based on DRM/DRI
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+C_SOURCES =
+
+DRIVER_INCLUDES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-I) \
+                   -I$(TOP)/src/gallium/winsys/drm/nouveau \
+DRIVER_DEFINES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-other)
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \
+	$(TOP)/src/gallium/drivers/nv04/libnv04.a \
+	$(TOP)/src/gallium/drivers/nv10/libnv10.a \
+	$(TOP)/src/gallium/drivers/nv20/libnv20.a \
+	$(TOP)/src/gallium/drivers/nv30/libnv30.a \
+	$(TOP)/src/gallium/drivers/nv40/libnv40.a \
+	$(TOP)/src/gallium/drivers/nv50/libnv50.a \
+	$(TOP)/src/gallium/drivers/nouveau/libnouveau.a \
+        $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a
+
+DRIVER_LIB_DEPS += $(shell pkg-config libdrm_nouveau --libs)
+
+include ../Makefile.template
diff --git a/src/gallium/winsys/g3dvl/nouveau/Makefile b/src/gallium/winsys/g3dvl/nouveau/Makefile
deleted file mode 100644
index 4eba18a0d5c..00000000000
--- a/src/gallium/winsys/g3dvl/nouveau/Makefile
+++ /dev/null
@@ -1,98 +0,0 @@
-# This makefile produces a libXvMCg3dvl.so which is
-# based on DRM/DRI
-
-TOP = ../../../../..
-include $(TOP)/configs/current
-
-XVMC_MAJOR = 1
-XVMC_MINOR = 0
-XVMC_LIB = XvMCg3dvl
-XVMC_LIB_NAME = lib$(XVMC_LIB).so
-XVMC_LIB_DEPS = $(EXTRA_LIB_PATH) -lXvMC -lXv -lX11 -lm
-
-SOURCES = drm_nouveau_winsys.c
-
-DRIVER_INCLUDES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-I) \
-                   -I$(TOP)/src/gallium/winsys/drm/nouveau \
-                   -I$(TOP)/src/driclient/include
-DRIVER_DEFINES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-other)
-
-PIPE_DRIVERS = \
-	$(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \
-	$(TOP)/src/gallium/drivers/nv04/libnv04.a \
-	$(TOP)/src/gallium/drivers/nv10/libnv10.a \
-	$(TOP)/src/gallium/drivers/nv20/libnv20.a \
-	$(TOP)/src/gallium/drivers/nv30/libnv30.a \
-	$(TOP)/src/gallium/drivers/nv40/libnv40.a \
-	$(TOP)/src/gallium/drivers/nv50/libnv50.a \
-	$(TOP)/src/gallium/drivers/nouveau/libnouveau.a
-
-DRI_LIB_DEPS += $(shell pkg-config libdrm_nouveau --libs)
-
-###############################################################
-
-INCLUDES = $(DRIVER_INCLUDES) \
-           -I$(TOP)/src/gallium/include \
-           -I$(TOP)/src/gallium/auxiliary \
-           -I$(TOP)/src/gallium/drivers \
-           -I$(TOP)/src/gallium/winsys/g3dvl \
-           -I$(TOP)/src/driclient/include
-
-DEFINES += $(DRIVER_DEFINES) \
-           -DGALLIUM_SOFTPIPE \
-	   -DGALLIUM_TRACE
-
-# XXX: Hack, if we include libxvmctracker.a in LIBS none of the symbols are
-# pulled in by the linker because xsp_winsys.c doesn't refer to them
-OBJECTS = $(SOURCES:.c=.o) $(TOP)/src/gallium/state_trackers/xorg/xvmc/*.o
-
-LIBS = $(PIPE_DRIVERS) \
-       $(TOP)/src/driclient/lib/libdriclient.a \
-       $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
-       $(TOP)/src/gallium/auxiliary/vl/libvl.a \
-       $(TOP)/src/gallium/auxiliary/draw/libdraw.a \
-       $(TOP)/src/gallium/auxiliary/tgsi/libtgsi.a \
-       $(TOP)/src/gallium/auxiliary/translate/libtranslate.a \
-       $(TOP)/src/gallium/auxiliary/cso_cache/libcso_cache.a \
-       $(TOP)/src/gallium/auxiliary/rtasm/librtasm.a \
-       $(TOP)/src/gallium/auxiliary/util/libutil.a
-
-.c.o:
-	$(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@
-
-.S.o:
-	$(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@
-
-.PHONY: default $(TOP)/$(LIB_DIR)/gallium clean
-
-default: depend $(TOP)/$(LIB_DIR)/gallium $(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME)
-
-$(TOP)/$(LIB_DIR)/gallium:
-	@mkdir -p $(TOP)/$(LIB_DIR)/gallium
-
-# Make the libXvMCg3dvl.so library
-$(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME): $(OBJECTS) $(LIBS) Makefile
-	$(MKLIB) -o $(XVMC_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
-		-major $(XVMC_MAJOR) -minor $(XVMC_MINOR) $(MKLIB_OPTIONS) \
-		-install $(TOP)/$(LIB_DIR)/gallium -id $(INSTALL_LIB_DIR)/lib$(XVMC_LIB).1.dylib \
-		$(XVMC_LIB_DEPS) $(DRI_LIB_DEPS) $(OBJECTS) $(LIBS)
-
-depend: $(SOURCES) Makefile
-	$(RM) depend
-	touch depend
-	$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDES) $(SOURCES)
-
-#install: default
-#	$(INSTALL) -d $(INSTALL_DIR)/include/GL
-#	$(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR)
-#	$(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL
-#	@if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \
-#		$(INSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \
-#	fi
-
-clean: Makefile
-	$(RM) $(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME)
-	$(RM) *.o *~
-	$(RM) depend depend.bak
-
--include depend
diff --git a/src/gallium/winsys/g3dvl/nouveau/drm_nouveau_winsys.c b/src/gallium/winsys/g3dvl/nouveau/drm_nouveau_winsys.c
deleted file mode 100644
index 257aa0a1201..00000000000
--- a/src/gallium/winsys/g3dvl/nouveau/drm_nouveau_winsys.c
+++ /dev/null
@@ -1,393 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include <vl_winsys.h>
-#include <driclient.h>
-#include <state_tracker/dri1_api.h>
-#include <pipe/p_video_context.h>
-#include <pipe/p_state.h>
-#include <util/u_memory.h>
-
-struct vl_dri_screen
-{
-   struct vl_screen base;
-   Visual *visual;
-   struct drm_api *api;
-   dri_screen_t *dri_screen;
-   dri_framebuffer_t dri_framebuf;
-   struct dri1_api *api_hooks;
-};
-
-struct vl_dri_context
-{
-   struct vl_context base;
-   boolean is_locked;
-   boolean lost_lock;
-   drmLock *lock;
-   dri_context_t *dri_context;
-   int fd;
-   struct pipe_video_context *vpipe;
-   dri_drawable_t *drawable;
-};
-
-static void
-vl_dri_lock(void *priv)
-{
-   struct vl_dri_context *vl_dri_ctx = priv;
-   drm_context_t hw_context;
-   char ret = 0;
-
-   assert(priv);
-
-   hw_context = vl_dri_ctx->dri_context->drm_context;
-
-   DRM_CAS(vl_dri_ctx->lock, hw_context, DRM_LOCK_HELD | hw_context, ret);
-   if (ret) {
-      drmGetLock(vl_dri_ctx->fd, hw_context, 0);
-      vl_dri_ctx->lost_lock = TRUE;
-   }
-   vl_dri_ctx->is_locked = TRUE;
-}
-
-static void
-vl_dri_unlock(void *priv)
-{
-   struct vl_dri_context *vl_dri_ctx = priv;
-   drm_context_t hw_context;
-
-   assert(priv);
-
-   hw_context = vl_dri_ctx->dri_context->drm_context;
-
-   vl_dri_ctx->is_locked = FALSE;
-   DRM_UNLOCK(vl_dri_ctx->fd, vl_dri_ctx->lock, hw_context);
-}
-
-static boolean
-vl_dri_is_locked(void *priv)
-{
-   struct vl_dri_context *vl_dri_ctx = priv;
-
-   assert(priv);
-
-   return vl_dri_ctx->is_locked;
-}
-
-static boolean
-vl_dri_lost_lock(void *priv)
-{
-   struct vl_dri_context *vl_dri_ctx = priv;
-
-   assert(priv);
-
-   return vl_dri_ctx->lost_lock;
-}
-
-static void
-vl_dri_clear_lost_lock(void *priv)
-{
-   struct vl_dri_context *vl_dri_ctx = priv;
-
-   assert(priv);
-
-   vl_dri_ctx->lost_lock = FALSE;
-}
-
-struct dri1_api_lock_funcs dri1_lf =
-{
-   .lock = vl_dri_lock,
-   .unlock = vl_dri_unlock,
-   .is_locked = vl_dri_is_locked,
-   .is_lock_lost = vl_dri_lost_lock,
-   .clear_lost_lock = vl_dri_clear_lost_lock
-};
-
-static void
-vl_dri_copy_version(struct dri1_api_version *dst, dri_version_t *src)
-{
-   assert(src);
-   assert(dst);
-   dst->major = src->major;
-   dst->minor = src->minor;
-   dst->patch_level = src->patch;
-}
-
-static boolean
-vl_dri_intersect_src_bbox(struct drm_clip_rect *dst, int dst_x, int dst_y,
-                          const struct drm_clip_rect *src, const struct drm_clip_rect *bbox)
-{
-   int xy1;
-   int xy2;
-
-   assert(dst);
-   assert(src);
-   assert(bbox);
-
-   xy1 = ((int)src->x1 > (int)bbox->x1 + dst_x) ? src->x1 :
-      (int)bbox->x1 + dst_x;
-   xy2 = ((int)src->x2 < (int)bbox->x2 + dst_x) ? src->x2 :
-      (int)bbox->x2 + dst_x;
-   if (xy1 >= xy2 || xy1 < 0)
-      return FALSE;
-
-   dst->x1 = xy1;
-   dst->x2 = xy2;
-
-   xy1 = ((int)src->y1 > (int)bbox->y1 + dst_y) ? src->y1 :
-      (int)bbox->y1 + dst_y;
-   xy2 = ((int)src->y2 < (int)bbox->y2 + dst_y) ? src->y2 :
-      (int)bbox->y2 + dst_y;
-   if (xy1 >= xy2 || xy1 < 0)
-      return FALSE;
-
-   dst->y1 = xy1;
-   dst->y2 = xy2;
-   return TRUE;
-}
-
-static void
-vl_clip_copy(struct vl_dri_context *vl_dri_ctx,
-	     struct pipe_surface *dst,
-	     struct pipe_surface *src,
-	     const struct drm_clip_rect *src_bbox)
-{
-   struct pipe_video_context *vpipe = vl_dri_ctx->base.vpipe;
-   struct drm_clip_rect clip;
-   struct drm_clip_rect *cur;
-   int i;
-
-   assert(vl_dri_ctx);
-   assert(dst);
-   assert(src);
-   assert(src_bbox);
-
-   assert(vl_dri_ctx->drawable->cliprects);
-   assert(vl_dri_ctx->drawable->num_cliprects > 0);
-
-   cur = vl_dri_ctx->drawable->cliprects;
-
-   for (i = 0; i < vl_dri_ctx->drawable->num_cliprects; ++i) {
-      if (vl_dri_intersect_src_bbox(&clip, vl_dri_ctx->drawable->x, vl_dri_ctx->drawable->y, cur++, src_bbox))
-         vpipe->surface_copy
-         (
-            vpipe, dst, clip.x1, clip.y1, src,
-            (int)clip.x1 - vl_dri_ctx->drawable->x,
-            (int)clip.y1 - vl_dri_ctx->drawable->y,
-            clip.x2 - clip.x1, clip.y2 - clip.y1
-         );
-   }
-}
-
-static void
-vl_dri_update_drawables_locked(struct vl_dri_context *vl_dri_ctx)
-{
-   struct vl_dri_screen *vl_dri_scrn;
-
-   assert(vl_dri_ctx);
-
-   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
-
-   if (vl_dri_ctx->lost_lock) {
-      vl_dri_ctx->lost_lock = FALSE;
-      DRI_VALIDATE_DRAWABLE_INFO(vl_dri_scrn->dri_screen, vl_dri_ctx->drawable);
-   }
-}
-
-static void
-vl_dri_flush_frontbuffer(struct pipe_screen *screen,
-                         struct pipe_surface *surf, void *context_private)
-{
-   struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)context_private;
-   struct vl_dri_screen *vl_dri_scrn;
-   struct drm_clip_rect src_bbox;
-   boolean save_lost_lock = FALSE;
-
-   assert(screen);
-   assert(surf);
-   assert(context_private);
-
-   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
-
-   vl_dri_lock(vl_dri_ctx);
-
-   save_lost_lock = vl_dri_ctx->lost_lock;
-
-   vl_dri_update_drawables_locked(vl_dri_ctx);
-
-   src_bbox.x1 = 0;
-   src_bbox.x2 = vl_dri_ctx->drawable->w;
-   src_bbox.y1 = 0;
-   src_bbox.y2 = vl_dri_ctx->drawable->h;
-
-#if 0
-   if (vl_dri_scrn->_api_hooks->present_locked)
-      vl_dri_scrn->api_hooks->present_locked(pipe, surf,
-                                             vl_dri_ctx->drawable->cliprects,
-                                             vl_dri_ctx->drawable->num_cliprects,
-                                             vl_dri_ctx->drawable->x, vl_dri_drawable->y,
-                                             &bbox, NULL /*fence*/);
-   else
-#endif
-   if (vl_dri_scrn->api_hooks->front_srf_locked) {
-      struct pipe_surface *front = vl_dri_scrn->api_hooks->front_srf_locked(screen);
-
-      if (front)
-         vl_clip_copy(vl_dri_ctx, front, surf, &src_bbox);
-
-      //st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, fence);
-   }
-
-   vl_dri_ctx->lost_lock = save_lost_lock;
-
-   vl_dri_unlock(vl_dri_ctx);
-}
-
-Drawable
-vl_video_bind_drawable(struct vl_context *vctx, Drawable drawable)
-{
-   struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)vctx;
-   struct vl_dri_screen *vl_dri_scrn;
-   dri_drawable_t *dri_drawable;
-   Drawable old_drawable = None;
-
-   assert(vctx);
-
-   if (vl_dri_ctx->drawable)
-      old_drawable = vl_dri_ctx->drawable->x_drawable;
-
-   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
-   driCreateDrawable(vl_dri_scrn->dri_screen, drawable, &dri_drawable);
-   vl_dri_ctx->drawable = dri_drawable;
-
-   return old_drawable;
-}
-
-struct vl_screen*
-vl_screen_create(Display *display, int screen)
-{
-   struct vl_dri_screen *vl_dri_scrn;
-   struct dri1_create_screen_arg arg;
-
-   assert(display);
-
-   vl_dri_scrn = CALLOC_STRUCT(vl_dri_screen);
-   if (!vl_dri_scrn)
-      return NULL;
-
-   driCreateScreen(display, screen, &vl_dri_scrn->dri_screen, &vl_dri_scrn->dri_framebuf);
-   vl_dri_scrn->api = drm_api_create();
-
-   arg.base.mode = DRM_CREATE_DRI1;
-   arg.lf = &dri1_lf;
-   arg.ddx_info = vl_dri_scrn->dri_framebuf.private;
-   arg.ddx_info_size = vl_dri_scrn->dri_framebuf.private_size;
-   arg.sarea = vl_dri_scrn->dri_screen->sarea;
-   vl_dri_copy_version(&arg.ddx_version, &vl_dri_scrn->dri_screen->ddx);
-   vl_dri_copy_version(&arg.dri_version, &vl_dri_scrn->dri_screen->dri);
-   vl_dri_copy_version(&arg.drm_version, &vl_dri_scrn->dri_screen->drm);
-   arg.api = NULL;
-
-   vl_dri_scrn->base.pscreen = vl_dri_scrn->api->create_screen(vl_dri_scrn->api,
-                                                               vl_dri_scrn->dri_screen->fd,
-                                                               &arg.base);
-
-   if (!vl_dri_scrn->base.pscreen) {
-      FREE(vl_dri_scrn);
-      return NULL;
-   }
-
-   vl_dri_scrn->visual = XDefaultVisual(display, screen);
-   vl_dri_scrn->api_hooks = arg.api;
-   vl_dri_scrn->base.pscreen->flush_frontbuffer = vl_dri_flush_frontbuffer;
-   /* XXX: Safe to call this while unlocked? */
-   vl_dri_scrn->base.format = vl_dri_scrn->api_hooks->front_srf_locked(vl_dri_scrn->base.pscreen)->format;
-
-   return &vl_dri_scrn->base;
-}
-
-void vl_screen_destroy(struct vl_screen *vscreen)
-{
-   struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)vscreen;
-
-   assert(vscreen);
-
-   vl_dri_scrn->base.pscreen->destroy(vl_dri_scrn->base.pscreen);
-   driDestroyScreen(vl_dri_scrn->dri_screen);
-   FREE(vl_dri_scrn);
-}
-
-struct vl_context*
-vl_video_create(struct vl_screen *vscreen,
-                enum pipe_video_profile profile,
-                enum pipe_video_chroma_format chroma_format,
-                unsigned width, unsigned height)
-{
-   struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)vscreen;
-   struct vl_dri_context *vl_dri_ctx;
-
-   vl_dri_ctx = CALLOC_STRUCT(vl_dri_context);
-   if (!vl_dri_ctx)
-      return NULL;
-
-   /* XXX: Is default visual correct/sufficient here? */
-   driCreateContext(vl_dri_scrn->dri_screen, vl_dri_scrn->visual, &vl_dri_ctx->dri_context);
-
-   if (!vl_dri_scrn->api->create_video_context) {
-      debug_printf("[G3DVL] No video support found on %s/%s.\n",
-                   vl_dri_scrn->base.pscreen->get_vendor(vl_dri_scrn->base.pscreen),
-                   vl_dri_scrn->base.pscreen->get_name(vl_dri_scrn->base.pscreen));
-      FREE(vl_dri_ctx);
-      return NULL;
-   }
-
-   vl_dri_ctx->base.vpipe = vl_dri_scrn->api->create_video_context(vl_dri_scrn->api,
-                                                                   vscreen->pscreen,
-                                                                   profile, chroma_format,
-                                                                   width, height);
-
-   if (!vl_dri_ctx->base.vpipe) {
-      FREE(vl_dri_ctx);
-      return NULL;
-   }
-
-   vl_dri_ctx->base.vpipe->priv = vl_dri_ctx;
-   vl_dri_ctx->base.vscreen = vscreen;
-   vl_dri_ctx->fd = vl_dri_scrn->dri_screen->fd;
-   vl_dri_ctx->lock = (drmLock*)&vl_dri_scrn->dri_screen->sarea->lock;
-
-   return &vl_dri_ctx->base;
-}
-
-void vl_video_destroy(struct vl_context *vctx)
-{
-   struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)vctx;
-
-   assert(vctx);
-
-   vl_dri_ctx->base.vpipe->destroy(vl_dri_ctx->base.vpipe);
-   FREE(vl_dri_ctx);
-}
diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c
deleted file mode 100644
index dfc4905bc03..00000000000
--- a/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c
+++ /dev/null
@@ -1,172 +0,0 @@
-#include "nouveau_context_vl.h"
-#include <pipe/p_defines.h>
-#include <pipe/p_context.h>
-#include <pipe/p_screen.h>
-#include <util/u_memory.h>
-#include <common/nouveau_dri.h>
-#include <common/nouveau_local.h>
-#include <common/nouveau_winsys_pipe.h>
-#include "nouveau_screen_vl.h"
-
-/*
-#ifdef DEBUG
-static const struct dri_debug_control debug_control[] = {
-	{ "bo", DEBUG_BO },
-	{ NULL, 0 }
-};
-int __nouveau_debug = 0;
-#endif
-*/
-
-int
-nouveau_context_create(dri_context_t *dri_context)
-{
-	dri_screen_t			*dri_screen;
-	struct nouveau_screen_vl	*nv_screen;
-	struct nouveau_context_vl	*nv;
-
-	assert (dri_context);
-
-	dri_screen = dri_context->dri_screen;
-	nv_screen = dri_screen->private;
-	nv = CALLOC_STRUCT(nouveau_context_vl);
-
-	if (!nv)
-		return 1;
-
-	if (nouveau_context_init(&nv_screen->base, dri_context->drm_context,
-	                        (drmLock*)&dri_screen->sarea->lock, NULL, &nv->base))
-	{
-		FREE(nv);
-		return 1;
-	}
-
-	dri_context->private = (void*)nv;
-	nv->dri_context = dri_context;
-	nv->nv_screen  = nv_screen;
-
-	/*
-	driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache,
-			    nv->dri_screen->myNum, "nouveau");
-#ifdef DEBUG
-	__nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"),
-					      debug_control);
-#endif
-	*/
-
-	nv->base.nvc->pctx[nv->base.pctx_id]->priv = nv;
-
-	return 0;
-}
-
-void
-nouveau_context_destroy(dri_context_t *dri_context)
-{
-	struct nouveau_context_vl *nv = dri_context->private;
-
-	assert(dri_context);
-
-	nouveau_context_cleanup(&nv->base);
-
-	FREE(nv);
-}
-
-int
-nouveau_context_bind(struct nouveau_context_vl *nv, dri_drawable_t *dri_drawable)
-{
-	assert(nv);
-	assert(dri_drawable);
-
-	if (nv->dri_drawable != dri_drawable)
-	{
-		nv->dri_drawable = dri_drawable;
-		dri_drawable->private = nv;
-	}
-
-	return 0;
-}
-
-int
-nouveau_context_unbind(struct nouveau_context_vl *nv)
-{
-	assert(nv);
-
-	nv->dri_drawable = NULL;
-
-	return 0;
-}
-
-/* Show starts here */
-
-int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable)
-{
-	struct nouveau_context_vl	*nv;
-	dri_drawable_t			*dri_drawable;
-
-	assert(pipe);
-
-	nv = pipe->priv;
-
-	driCreateDrawable(nv->nv_screen->dri_screen, drawable, &dri_drawable);
-
-	nouveau_context_bind(nv, dri_drawable);
-
-	return 0;
-}
-
-int unbind_pipe_drawable(struct pipe_context *pipe)
-{
-	assert (pipe);
-
-	nouveau_context_unbind(pipe->priv);
-
-	return 0;
-}
-
-struct pipe_context* create_pipe_context(Display *display, int screen)
-{
-	dri_screen_t			*dri_screen;
-	dri_framebuffer_t		dri_framebuf;
-	dri_context_t			*dri_context;
-	struct nouveau_context_vl	*nv;
-
-	assert(display);
-
-	driCreateScreen(display, screen, &dri_screen, &dri_framebuf);
-	driCreateContext(dri_screen, XDefaultVisual(display, screen), &dri_context);
-
-	nouveau_screen_create(dri_screen, &dri_framebuf);
-	nouveau_context_create(dri_context);
-
-	nv = dri_context->private;
-
-	return nv->base.nvc->pctx[nv->base.pctx_id];
-}
-
-int destroy_pipe_context(struct pipe_context *pipe)
-{
-	struct pipe_screen		*screen;
-	struct pipe_winsys		*winsys;
-	struct nouveau_context_vl	*nv;
-	dri_screen_t			*dri_screen;
-	dri_context_t			*dri_context;
-
-	assert(pipe);
-
-	screen = pipe->screen;
-	winsys = pipe->winsys;
-	nv = pipe->priv;
-	dri_context = nv->dri_context;
-	dri_screen = dri_context->dri_screen;
-
-	pipe->destroy(pipe);
-	screen->destroy(screen);
-	FREE(winsys);
-
-	nouveau_context_destroy(dri_context);
-	nouveau_screen_destroy(dri_screen);
-	driDestroyContext(dri_context);
-	driDestroyScreen(dri_screen);
-
-	return 0;
-}
diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h
deleted file mode 100644
index 1115c3130cb..00000000000
--- a/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __NOUVEAU_CONTEXT_VL_H__
-#define __NOUVEAU_CONTEXT_VL_H__
-
-#include <driclient.h>
-#include <nouveau/nouveau_winsys.h>
-#include <common/nouveau_context.h>
-
-/*#include "xmlconfig.h"*/
-
-struct nouveau_context_vl {
-	struct nouveau_context		base;
-	struct nouveau_screen_vl	*nv_screen;
-	dri_context_t			*dri_context;
-	dri_drawable_t			*dri_drawable;
-	unsigned int			last_stamp;
-	/*driOptionCache		dri_option_cache;*/
-	drm_context_t			drm_context;
-	drmLock				drm_lock;
-};
-
-extern int nouveau_context_create(dri_context_t *);
-extern void nouveau_context_destroy(dri_context_t *);
-extern int nouveau_context_bind(struct nouveau_context_vl *, dri_drawable_t *);
-extern int nouveau_context_unbind(struct nouveau_context_vl *);
-
-#ifdef DEBUG
-extern int __nouveau_debug;
-
-#define DEBUG_BO (1 << 0)
-
-#define DBG(flag, ...) do {                   \
-	if (__nouveau_debug & (DEBUG_##flag)) \
-		NOUVEAU_ERR(__VA_ARGS__);     \
-} while(0)
-#else
-#define DBG(flag, ...)
-#endif
-
-#endif
diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c
deleted file mode 100644
index b7c74f8299b..00000000000
--- a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c
+++ /dev/null
@@ -1,88 +0,0 @@
-#include "nouveau_screen_vl.h"
-#include <util/u_memory.h>
-#include <nouveau_drm.h>
-#include <common/nouveau_dri.h>
-#include <common/nouveau_local.h>
-
-#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 12
-#error nouveau_drm.h version does not match expected version
-#endif
-
-/*
-PUBLIC const char __driConfigOptions[] =
-DRI_CONF_BEGIN
-DRI_CONF_END;
-static const GLuint __driNConfigOptions = 0;
-*/
-
-int nouveau_check_dri_drm_ddx(dri_version_t *dri, dri_version_t *drm, dri_version_t *ddx)
-{
-	static const dri_version_t ddx_expected = {0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL};
-	static const dri_version_t dri_expected = {4, 0, 0};
-	static const dri_version_t drm_expected = {0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL};
-
-	assert(dri);
-	assert(drm);
-	assert(ddx);
-
-	if (dri->major != dri_expected.major || dri->minor < dri_expected.minor)
-	{
-		NOUVEAU_ERR("Unexpected DRI version.\n");
-		return 1;
-	}
-	if (drm->major != drm_expected.major || drm->minor < drm_expected.minor)
-	{
-		NOUVEAU_ERR("Unexpected DRM version.\n");
-		return 1;
-	}
-	if (ddx->major != ddx_expected.major || ddx->minor < ddx_expected.minor)
-	{
-		NOUVEAU_ERR("Unexpected DDX version.\n");
-		return 1;
-	}
-
-	return 0;
-}
-
-int
-nouveau_screen_create(dri_screen_t *dri_screen, dri_framebuffer_t *dri_framebuf)
-{
-	struct nouveau_dri		*nv_dri = dri_framebuf->private;
-	struct nouveau_screen_vl	*nv_screen;
-
-	assert(dri_screen);
-	assert(dri_framebuf);
-
-	if (nouveau_check_dri_drm_ddx(&dri_screen->dri, &dri_screen->drm, &dri_screen->ddx))
-		return 1;
-
-	nv_screen = CALLOC_STRUCT(nouveau_screen_vl);
-
-	if (!nv_screen)
-		return 1;
-
-	if (nouveau_screen_init(nv_dri, dri_screen->fd, &nv_screen->base))
-	{
-		FREE(nv_screen);
-		return 1;
-	}
-
-	/*
-	driParseOptionInfo(&nv_screen->option_cache,
-			   __driConfigOptions, __driNConfigOptions);
-	*/
-
-	nv_screen->dri_screen = dri_screen;
-	dri_screen->private = (void*)nv_screen;
-
-	return 0;
-}
-
-void
-nouveau_screen_destroy(dri_screen_t *dri_screen)
-{
-	struct nouveau_screen_vl *nv_screen = dri_screen->private;
-
-	nouveau_screen_cleanup(&nv_screen->base);
-	FREE(nv_screen);
-}
diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h
deleted file mode 100644
index 0c1ceca6dee..00000000000
--- a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef __NOUVEAU_SCREEN_VL_H__
-#define __NOUVEAU_SCREEN_VL_H__
-
-#include <driclient.h>
-#include <common/nouveau_screen.h>
-
-/* TODO: Investigate using DRI options for interesting things */
-/*#include "xmlconfig.h"*/
-
-struct nouveau_screen_vl
-{
-	struct nouveau_screen		base;
-	dri_screen_t			*dri_screen;
-	/*driOptionCache		option_cache;*/
-};
-
-int nouveau_screen_create(dri_screen_t *dri_screen, dri_framebuffer_t *dri_framebuf);
-void nouveau_screen_destroy(dri_screen_t *dri_screen);
-
-#endif
diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c
deleted file mode 100644
index 77e46a2054b..00000000000
--- a/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c
+++ /dev/null
@@ -1,94 +0,0 @@
-#include <driclient.h>
-#include <common/nouveau_local.h>
-#include <common/nouveau_screen.h>
-#include "nouveau_context_vl.h"
-#include "nouveau_swapbuffers.h"
-
-void
-nouveau_copy_buffer(dri_drawable_t *dri_drawable, struct pipe_surface *surf,
-		    const drm_clip_rect_t *rect)
-{
-	struct nouveau_context_vl	*nv = dri_drawable->private;
-	struct pipe_context		*pipe = nv->base.nvc->pctx[nv->base.pctx_id];
-	drm_clip_rect_t			*pbox;
-	int				nbox, i;
-
-	LOCK_HARDWARE(&nv->base);
-	if (!dri_drawable->num_cliprects) {
-		UNLOCK_HARDWARE(&nv->base);
-		return;
-	}
-	pbox = dri_drawable->cliprects;
-	nbox = dri_drawable->num_cliprects;
-
-	for (i = 0; i < nbox; i++, pbox++) {
-		int sx, sy, dx, dy, w, h;
-
-		sx = pbox->x1 - dri_drawable->x;
-		sy = pbox->y1 - dri_drawable->y;
-		dx = pbox->x1;
-		dy = pbox->y1;
-		w  = pbox->x2 - pbox->x1;
-		h  = pbox->y2 - pbox->y1;
-
-		pipe->surface_copy(pipe, nv->base.frontbuffer,
-				   dx, dy, surf, sx, sy, w, h);
-	}
-
-	FIRE_RING(nv->base.nvc->channel);
-	UNLOCK_HARDWARE(&nv->base);
-}
-
-void
-nouveau_copy_sub_buffer(dri_drawable_t *dri_drawable, struct pipe_surface *surf, int x, int y, int w, int h)
-{
-	if (surf) {
-		drm_clip_rect_t rect;
-		rect.x1 = x;
-		rect.y1 = y;
-		rect.x2 = x + w;
-		rect.y2 = y + h;
-
-		nouveau_copy_buffer(dri_drawable, surf, &rect);
-	}
-}
-
-void
-nouveau_swap_buffers(dri_drawable_t *dri_drawable, struct pipe_surface *surf)
-{
-	if (surf)
-		nouveau_copy_buffer(dri_drawable, surf, NULL);
-}
-
-void
-nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf,
-			  void *context_private)
-{
-	struct nouveau_context_vl	*nv;
-	dri_drawable_t			*dri_drawable;
-
-	assert(pws);
-	assert(surf);
-	assert(context_private);
-
-	nv = context_private;
-	dri_drawable = nv->dri_drawable;
-
-	nouveau_copy_buffer(dri_drawable, surf, NULL);
-}
-
-void
-nouveau_contended_lock(struct nouveau_context *nv)
-{
-	struct nouveau_context_vl	*nv_vl = (struct nouveau_context_vl*)nv;
-	dri_drawable_t			*dri_drawable = nv_vl->dri_drawable;
-	dri_screen_t			*dri_screen = nv_vl->dri_context->dri_screen;
-
-	/* If the window moved, may need to set a new cliprect now.
-	 *
-	 * NOTE: This releases and regains the hw lock, so all state
-	 * checking must be done *after* this call:
-	 */
-	if (dri_drawable)
-		DRI_VALIDATE_DRAWABLE_INFO(dri_screen, dri_drawable);
-}
diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h
deleted file mode 100644
index 35e934adba8..00000000000
--- a/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __NOUVEAU_SWAPBUFFERS_H__
-#define __NOUVEAU_SWAPBUFFERS_H__
-
-extern void nouveau_copy_buffer(dri_drawable_t *, struct pipe_surface *,
-				const drm_clip_rect_t *);
-extern void nouveau_copy_sub_buffer(dri_drawable_t *, struct pipe_surface *,
-				    int x, int y, int w, int h);
-extern void nouveau_swap_buffers(dri_drawable_t *, struct pipe_surface *);
-
-#endif
-- 
cgit v1.2.3


From 181d034ad53db3daec0512bd8410fc96d1a3f5b5 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Fri, 6 Nov 2009 01:09:24 -0500
Subject: g3dvl: Remove src/driclient

It's in src/gallium/winsys/g3dvl/dri now.
---
 src/driclient/src/Makefile | 19 -------------------
 1 file changed, 19 deletions(-)
 delete mode 100644 src/driclient/src/Makefile

diff --git a/src/driclient/src/Makefile b/src/driclient/src/Makefile
deleted file mode 100644
index 34435a2086e..00000000000
--- a/src/driclient/src/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-TARGET		= libdriclient.a
-OBJECTS		= driclient.o XF86dri.o
-DRMDIR		?= /usr
-
-CFLAGS		+= -g -Wall -fPIC -I../include -I${DRMDIR}/include -I${DRMDIR}/include/drm
-
-#############################################
-
-.PHONY	= all clean
-
-all: ${TARGET}
-
-${TARGET}: ${OBJECTS}
-	ar rcs $@ $^
-	if ! test -d ../lib; then mkdir ../lib; fi
-	cp ${TARGET} ../lib
-
-clean:
-	rm -rf ${OBJECTS} ${TARGET} ../lib/${TARGET}
-- 
cgit v1.2.3


From 09878fb91a9eeac83056ef9907b09d916a8c99f4 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 7 Nov 2009 19:45:47 -0500
Subject: g3dvl: Unbreak debug build.

---
 src/gallium/drivers/softpipe/sp_video_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index e6c8affae92..f7231dc3f12 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -339,7 +339,7 @@ sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
                    enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
                    bool pot_buffers)
 {
-   assert(screen);
+   assert(pipe);
    assert(width && height);
 
    switch (u_reduce_video_profile(profile)) {
-- 
cgit v1.2.3


From e57f7b7b107c610fa2d7f149f3441c2b4a9600cb Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 7 Nov 2009 19:46:33 -0500
Subject: g3dvl: Use ureg to build shaders.

---
 src/gallium/auxiliary/vl/Makefile                |   3 +-
 src/gallium/auxiliary/vl/SConscript              |   1 -
 src/gallium/auxiliary/vl/vl_compositor.c         | 187 ++-----
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 660 +++++++----------------
 src/gallium/auxiliary/vl/vl_shader_build.c       | 242 ---------
 src/gallium/auxiliary/vl/vl_shader_build.h       |  88 ---
 6 files changed, 246 insertions(+), 935 deletions(-)
 delete mode 100644 src/gallium/auxiliary/vl/vl_shader_build.c
 delete mode 100644 src/gallium/auxiliary/vl/vl_shader_build.h

diff --git a/src/gallium/auxiliary/vl/Makefile b/src/gallium/auxiliary/vl/Makefile
index 4314c1e8d69..b4b6fb5bdac 100644
--- a/src/gallium/auxiliary/vl/Makefile
+++ b/src/gallium/auxiliary/vl/Makefile
@@ -7,7 +7,6 @@ C_SOURCES = \
 	vl_bitstream_parser.c \
 	vl_mpeg12_mc_renderer.c \
 	vl_compositor.c \
-        vl_csc.c \
-	vl_shader_build.c
+        vl_csc.c
 
 include ../../Makefile.template
diff --git a/src/gallium/auxiliary/vl/SConscript b/src/gallium/auxiliary/vl/SConscript
index aed69f5efed..4b1ef90b9bb 100644
--- a/src/gallium/auxiliary/vl/SConscript
+++ b/src/gallium/auxiliary/vl/SConscript
@@ -7,7 +7,6 @@ vl = env.ConvenienceLibrary(
 		'vl_mpeg12_mc_renderer.c',
 		'vl_compositor.c',
                 'vl_csc.c',
-		'vl_shader_build.c',
 	])
 
 auxiliaries.insert(0, vl)
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index b36dbeb2088..810c7759596 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -29,11 +29,9 @@
 #include <assert.h>
 #include <pipe/p_context.h>
 #include <pipe/p_inlines.h>
-#include <tgsi/tgsi_parse.h>
-#include <tgsi/tgsi_build.h>
+#include <tgsi/tgsi_ureg.h>
 #include <util/u_memory.h>
 #include "vl_csc.h"
-#include "vl_shader_build.h"
 
 struct vertex2f
 {
@@ -76,156 +74,81 @@ static const struct vertex2f surface_verts[4] =
  */
 static const struct vertex2f *surface_texcoords = surface_verts;
 
-static void
+static bool
 create_vert_shader(struct vl_compositor *c)
 {
-   const unsigned max_tokens = 50;
-
-   struct pipe_shader_state vs;
-   struct tgsi_token *tokens;
-   struct tgsi_header *header;
-
-   struct tgsi_full_declaration decl;
-   struct tgsi_full_instruction inst;
-
-   unsigned ti;
-
-   unsigned i;
-
-   assert(c);
-
-   tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token));
-   *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-   header = (struct tgsi_header*)&tokens[1];
-   *header = tgsi_build_header();
-   *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
-   ti = 3;
-
-   /*
-    * decl i0             ; Vertex pos
-    * decl i1             ; Vertex texcoords
-    */
-   for (i = 0; i < 2; i++) {
-      decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
-
-   /*
-    * decl c0             ; Scaling vector to scale vertex pos rect to destination size
-    * decl c1             ; Translation vector to move vertex pos rect into position
-    * decl c2             ; Scaling vector to scale texcoord rect to source size
-    * decl c3             ; Translation vector to move texcoord rect into position
-    */
-   decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /*
-    * decl o0             ; Vertex pos
-    * decl o1             ; Vertex texcoords
-    */
-   for (i = 0; i < 2; i++) {
-      decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
+   struct ureg_program *shader;
+   struct ureg_src vpos, vtex;
+   struct ureg_src vpos_scale, vpos_trans, vtex_scale, vtex_trans;
+   struct ureg_dst o_vpos, o_vtex;
+   
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return false;
 
-   /* decl t0, t1 */
-   decl = vl_decl_temps(0, 1);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+   vpos = ureg_DECL_vs_input(shader, 0);
+   vtex = ureg_DECL_vs_input(shader, 1);
+   vpos_scale = ureg_DECL_constant(shader, 0);
+   vpos_trans = ureg_DECL_constant(shader, 1);
+   vtex_scale = ureg_DECL_constant(shader, 2);
+   vtex_trans = ureg_DECL_constant(shader, 3);
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+   o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1);
 
    /*
-    * mad o0, i0, c0, c1  ; Scale and translate unit output rect to destination size and pos
-    * mad o1, i1, c2, c3  ; Scale and translate unit texcoord rect to source size and pos
+    * o_vpos = vpos * vpos_scale + vpos_trans
+    * o_vtex = vtex * vtex_scale + vtex_trans
     */
-   for (i = 0; i < 2; ++i) {
-      inst = vl_inst4(TGSI_OPCODE_MAD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1);
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-   }
+   ureg_MAD(shader, o_vpos, vpos, vpos_scale, vpos_trans);
+   ureg_MAD(shader, o_vtex, vtex, vtex_scale, vtex_trans);
 
-   /* end */
-   inst = vl_end();
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+   ureg_END(shader);
 
-   assert(ti <= max_tokens);
+   c->vertex_shader = ureg_create_shader_and_destroy(shader, c->pipe);
+   if (!c->vertex_shader)
+      return false;
 
-   vs.tokens = tokens;
-   c->vertex_shader = c->pipe->create_vs_state(c->pipe, &vs);
-   FREE(tokens);
+   return true;
 }
 
-static void
+static bool
 create_frag_shader(struct vl_compositor *c)
 {
-   const unsigned max_tokens = 50;
-
-   struct pipe_shader_state fs;
-   struct tgsi_token *tokens;
-   struct tgsi_header *header;
-
-   struct tgsi_full_declaration decl;
-   struct tgsi_full_instruction inst;
-
-   unsigned ti;
-
+   struct ureg_program *shader;
+   struct ureg_src tc;
+   struct ureg_src csc[4];
+   struct ureg_src sampler;
+   struct ureg_dst texel;
+   struct ureg_dst fragment;
    unsigned i;
+   
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
 
-   assert(c);
-
-   tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token));
-   *(struct tgsi_version*)&tokens[0] = tgsi_build_version();
-   header = (struct tgsi_header*)&tokens[1];
-   *header = tgsi_build_header();
-   *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-   ti = 3;
-
-   /* decl i0             ; Texcoords for s0 */
-   decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
+   for (i = 0; i < 4; ++i)
+      csc[i] = ureg_DECL_constant(shader, i);
+   sampler = ureg_DECL_sampler(shader, 0);
+   texel = ureg_DECL_temporary(shader);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
-    * decl c0-c3          ; CSC matrix c0-c3
+    * texel = tex(tc, sampler)
+    * fragment = csc * texel
     */
-   decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /* decl o0             ; Fragment color */
-   decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /* decl t0 */
-   decl = vl_decl_temps(0, 0);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+   ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
+   for (i = 0; i < 4; ++i)
+      ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));
 
-   /* decl s0             ; Sampler for tex containing picture to display */
-   decl = vl_decl_samplers(0, 0);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+   ureg_release_temporary(shader, texel);
+   ureg_END(shader);
 
-   /* tex2d t0, i0, s0    ; Read src pixel */
-   inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0);
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-   /*
-    * dp4 o0.x, t0, c0    ; Multiply pixel by the color conversion matrix
-    * dp4 o0.y, t0, c1
-    * dp4 o0.z, t0, c2
-    * dp4 o0.w, t0, c3
-    */
-   for (i = 0; i < 4; ++i) {
-      inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i);
-      inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-   }
-
-   /* end */
-   inst = vl_end();
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-	
-   assert(ti <= max_tokens);
+   c->fragment_shader = ureg_create_shader_and_destroy(shader, c->pipe);
+   if (!c->fragment_shader)
+      return false;
 
-   fs.tokens = tokens;
-   c->fragment_shader = c->pipe->create_fs_state(c->pipe, &fs);
-   FREE(tokens);
+   return true;
 }
 
 static bool
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index bc4ab5fb357..adefafd3e9e 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -31,9 +31,7 @@
 #include <pipe/p_inlines.h>
 #include <util/u_math.h>
 #include <util/u_memory.h>
-#include <tgsi/tgsi_parse.h>
-#include <tgsi/tgsi_build.h>
-#include "vl_shader_build.h"
+#include <tgsi/tgsi_ureg.h>
 
 #define DEFAULT_BUF_ALIGNMENT 1
 #define MACROBLOCK_WIDTH 16
@@ -96,244 +94,126 @@ enum MACROBLOCK_TYPE
    NUM_MACROBLOCK_TYPES
 };
 
-static void
+static bool
 create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
-   const unsigned max_tokens = 50;
-
-   struct pipe_shader_state vs;
-   struct tgsi_token *tokens;
-   struct tgsi_header *header;
-
-   struct tgsi_full_declaration decl;
-   struct tgsi_full_instruction inst;
-
-   unsigned ti;
-
+   struct ureg_program *shader;
+   struct ureg_src vpos, vtex[3];
+   struct ureg_dst o_vpos, o_vtex[3];
    unsigned i;
+   
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return false;
 
-   assert(r);
-
-   tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
-   *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
-   header = (struct tgsi_header *) &tokens[1];
-   *header = tgsi_build_header();
-   *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
-   ti = 3;
-
-   /*
-    * decl i0              ; Vertex pos
-    * decl i1              ; Luma texcoords
-    * decl i2              ; Chroma Cb texcoords
-    * decl i3              ; Chroma Cr texcoords
-    */
-   for (i = 0; i < 4; i++) {
-      decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
-
-   /*
-    * decl o0              ; Vertex pos
-    * decl o1              ; Luma texcoords
-    * decl o2              ; Chroma Cb texcoords
-    * decl o3              ; Chroma Cr texcoords
-    */
-   for (i = 0; i < 4; i++) {
-      decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
+   vpos = ureg_DECL_vs_input(shader, 0);
+   for (i = 0; i < 3; ++i)
+      vtex[i] = ureg_DECL_vs_input(shader, i + 1);
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+   for (i = 0; i < 3; ++i)
+      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
 
    /*
-    * mov o0, i0           ; Move input vertex pos to output
-    * mov o1, i1           ; Move input luma texcoords to output
-    * mov o2, i2           ; Move input chroma Cb texcoords to output
-    * mov o3, i3           ; Move input chroma Cr texcoords to output
+    * o_vpos = vpos
+    * o_vtex[0..2] = vtex[0..2]
     */
-   for (i = 0; i < 4; ++i) {
-      inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-   }
+   ureg_MOV(shader, o_vpos, vpos);
+   for (i = 0; i < 3; ++i)
+      ureg_MOV(shader, o_vtex[i], vtex[i]);
 
-   /* end */
-   inst = vl_end();
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+   ureg_END(shader);
 
-   assert(ti <= max_tokens);
+   r->i_vs = ureg_create_shader_and_destroy(shader, r->pipe);
+   if (!r->i_vs)
+      return false;
 
-   vs.tokens = tokens;
-   r->i_vs = r->pipe->create_vs_state(r->pipe, &vs);
-   free(tokens);
+   return true;
 }
 
-static void
+static bool
 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
-   const unsigned max_tokens = 100;
-
-   struct pipe_shader_state fs;
-   struct tgsi_token *tokens;
-   struct tgsi_header *header;
-
-   struct tgsi_full_declaration decl;
-   struct tgsi_full_instruction inst;
-
-   unsigned ti;
-
+   struct ureg_program *shader;
+   struct ureg_src scale;
+   struct ureg_src tc[3];
+   struct ureg_src sampler[3];
+   struct ureg_dst texel, temp;
+   struct ureg_dst fragment;
    unsigned i;
+   
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
 
-   assert(r);
-
-   tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
-   *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
-   header = (struct tgsi_header *) &tokens[1];
-   *header = tgsi_build_header();
-   *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-   ti = 3;
-
-   /*
-    * decl i0                      ; Luma texcoords
-    * decl i1                      ; Chroma Cb texcoords
-    * decl i2                      ; Chroma Cr texcoords
-    */
-   for (i = 0; i < 3; ++i) {
-      decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
-
-   /* decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
-   decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /* decl o0                      ; Fragment color */
-   decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /* decl t0, t1 */
-   decl = vl_decl_temps(0, 1);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /*
-    * decl s0                      ; Sampler for luma texture
-    * decl s1                      ; Sampler for chroma Cb texture
-    * decl s2                      ; Sampler for chroma Cr texture
-    */
-   for (i = 0; i < 3; ++i) {
-      decl = vl_decl_samplers(i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+   scale = ureg_DECL_constant(shader, 0);
+   for (i = 0; i < 3; ++i)  {
+      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+      sampler[i] = ureg_DECL_sampler(shader, i);
    }
+   texel = ureg_DECL_temporary(shader);
+   temp = ureg_DECL_temporary(shader);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
-    * tex2d t1, i0, s0             ; Read texel from luma texture
-    * mov t0.x, t1.x               ; Move luma sample into .x component
-    * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
-    * mov t0.y, t1.x               ; Move Cb sample into .y component
-    * tex2d t1, i2, s2             ; Read texel from chroma Cr texture
-    * mov t0.z, t1.x               ; Move Cr sample into .z component
+    * texel.r = tex(tc[0], sampler[0])
+    * texel.g = tex(tc[1], sampler[1])
+    * texel.b = tex(tc[2], sampler[2])
+    * fragment = texel * scale
     */
    for (i = 0; i < 3; ++i) {
-      inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-      inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-      inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+      /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
+      ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
+      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
    }
+   ureg_MUL(shader, fragment, ureg_src(texel), scale);
 
-   /* mul o0, t0, c0               ; Rescale texel to correct range */
-   inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-   /* end */
-   inst = vl_end();
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+   ureg_release_temporary(shader, texel);
+   ureg_release_temporary(shader, temp);
+   ureg_END(shader);
 
-   assert(ti <= max_tokens);
+   r->i_fs = ureg_create_shader_and_destroy(shader, r->pipe);
+   if (!r->i_fs)
+      return false;
 
-   fs.tokens = tokens;
-   r->i_fs = r->pipe->create_fs_state(r->pipe, &fs);
-   free(tokens);
+   return true;
 }
 
-static void
+static bool
 create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
-   const unsigned max_tokens = 100;
-
-   struct pipe_shader_state vs;
-   struct tgsi_token *tokens;
-   struct tgsi_header *header;
-
-   struct tgsi_full_declaration decl;
-   struct tgsi_full_instruction inst;
-
-   unsigned ti;
-
+   struct ureg_program *shader;
+   struct ureg_src vpos, vtex[4];
+   struct ureg_dst o_vpos, o_vtex[4];
    unsigned i;
+   
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return false;
 
-   assert(r);
-
-   tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
-   *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
-   header = (struct tgsi_header *) &tokens[1];
-   *header = tgsi_build_header();
-   *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
-   ti = 3;
-
-   /*
-    * decl i0              ; Vertex pos
-    * decl i1              ; Luma texcoords
-    * decl i2              ; Chroma Cb texcoords
-    * decl i3              ; Chroma Cr texcoords
-    * decl i4              ; Ref surface top field texcoords
-    * decl i5              ; Ref surface bottom field texcoords (unused, packed in the same stream)
-    */
-   for (i = 0; i < 6; i++) {
-      decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
-
-   /*
-    * decl o0              ; Vertex pos
-    * decl o1              ; Luma texcoords
-    * decl o2              ; Chroma Cb texcoords
-    * decl o3              ; Chroma Cr texcoords
-    * decl o4              ; Ref macroblock texcoords
-    */
-   for (i = 0; i < 5; i++) {
-      decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
+   vpos = ureg_DECL_vs_input(shader, 0);
+   for (i = 0; i < 4; ++i)
+      vtex[i] = ureg_DECL_vs_input(shader, i + 1);
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+   for (i = 0; i < 4; ++i)
+      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
 
    /*
-    * mov o0, i0           ; Move input vertex pos to output
-    * mov o1, i1           ; Move input luma texcoords to output
-    * mov o2, i2           ; Move input chroma Cb texcoords to output
-    * mov o3, i3           ; Move input chroma Cr texcoords to output
+    * o_vpos = vpos
+    * o_vtex[0..2] = vtex[0..2]
+    * o_vtex[3] = vpos + vtex[3] // Apply motion vector
     */
-   for (i = 0; i < 4; ++i) {
-        inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
-        ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-   }
-
-   /* add o4, i0, i4       ; Translate vertex pos by motion vec to form ref macroblock texcoords */
-   inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4);
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+   ureg_MOV(shader, o_vpos, vpos);
+   for (i = 0; i < 3; ++i)
+      ureg_MOV(shader, o_vtex[i], vtex[i]);
+   ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
 
-   /* end */
-   inst = vl_end();
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+   ureg_END(shader);
 
-   assert(ti <= max_tokens);
+   r->p_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
+   if (!r->p_vs[0])
+      return false;
 
-   vs.tokens = tokens;
-   r->p_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
-   free(tokens);
+   return true;
 }
 
 static void
@@ -342,107 +222,54 @@ create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
    assert(false);
 }
 
-static void
+static bool
 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
-   const unsigned max_tokens = 100;
-
-   struct pipe_shader_state fs;
-   struct tgsi_token *tokens;
-   struct tgsi_header *header;
-
-   struct tgsi_full_declaration decl;
-   struct tgsi_full_instruction inst;
-
-   unsigned ti;
-
+   struct ureg_program *shader;
+   struct ureg_src scale;
+   struct ureg_src tc[4];
+   struct ureg_src sampler[4];
+   struct ureg_dst texel, ref;
+   struct ureg_dst fragment;
    unsigned i;
+   
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
 
-   assert(r);
-
-   tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
-   *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
-   header = (struct tgsi_header *) &tokens[1];
-   *header = tgsi_build_header();
-   *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-   ti = 3;
-
-   /*
-    * decl i0                      ; Luma texcoords
-    * decl i1                      ; Chroma Cb texcoords
-    * decl i2                      ; Chroma Cr texcoords
-    * decl i3                      ; Ref macroblock texcoords
-    */
-   for (i = 0; i < 4; ++i) {
-      decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
-
-   /* decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */
-   decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /* decl o0                      ; Fragment color */
-   decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /* decl t0, t1 */
-   decl = vl_decl_temps(0, 1);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /*
-    * decl s0                      ; Sampler for luma texture
-    * decl s1                      ; Sampler for chroma Cb texture
-    * decl s2                      ; Sampler for chroma Cr texture
-    * decl s3                      ; Sampler for ref surface texture
-    */
-   for (i = 0; i < 4; ++i) {
-      decl = vl_decl_samplers(i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+   scale = ureg_DECL_constant(shader, 0);
+   for (i = 0; i < 4; ++i)  {
+      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+      sampler[i] = ureg_DECL_sampler(shader, i);
    }
+   texel = ureg_DECL_temporary(shader);
+   ref = ureg_DECL_temporary(shader);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
-    * tex2d t1, i0, s0             ; Read texel from luma texture
-    * mov t0.x, t1.x               ; Move luma sample into .x component
-    * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
-    * mov t0.y, t1.x               ; Move Cb sample into .y component
-    * tex2d t1, i2, s2             ; Read texel from chroma Cr texture
-    * mov t0.z, t1.x               ; Move Cr sample into .z component
+    * texel.r = tex(tc[0], sampler[0])
+    * texel.g = tex(tc[1], sampler[1])
+    * texel.b = tex(tc[2], sampler[2])
+    * ref = tex(tc[3], sampler[3])
+    * fragment = texel * scale + ref
     */
    for (i = 0; i < 3; ++i) {
-      inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-      inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-      inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+      /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
+      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
+      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
    }
+   ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
+   ureg_MAD(shader, fragment, ureg_src(texel), scale, ureg_src(ref));
 
-   /* mul t0, t0, c0               ; Rescale texel to correct range */
-   inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-   /* tex2d t1, i3, s3             ; Read texel from ref macroblock */
-   inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3);
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+   ureg_release_temporary(shader, texel);
+   ureg_release_temporary(shader, ref);
+   ureg_END(shader);
 
-   /* add o0, t0, t1               ; Add ref and differential to form final output */
-   inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-   /* end */
-   inst = vl_end();
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-   assert(ti <= max_tokens);
+   r->p_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
+   if (!r->p_fs[0])
+      return false;
 
-   fs.tokens = tokens;
-   r->p_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
-   free(tokens);
+   return true;
 }
 
 static void
@@ -451,89 +278,45 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    assert(false);
 }
 
-static void
+static bool
 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
-   const unsigned max_tokens = 100;
-
-   struct pipe_shader_state vs;
-   struct tgsi_token *tokens;
-   struct tgsi_header *header;
-
-   struct tgsi_full_declaration decl;
-   struct tgsi_full_instruction inst;
-
-   unsigned ti;
-
+   struct ureg_program *shader;
+   struct ureg_src vpos, vtex[5];
+   struct ureg_dst o_vpos, o_vtex[5];
    unsigned i;
+   
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return false;
 
-   assert(r);
-
-   tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
-   *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
-   header = (struct tgsi_header *) &tokens[1];
-   *header = tgsi_build_header();
-   *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header);
-
-   ti = 3;
-
-   /*
-    * decl i0              ; Vertex pos
-    * decl i1              ; Luma texcoords
-    * decl i2              ; Chroma Cb texcoords
-    * decl i3              ; Chroma Cr texcoords
-    * decl i4              ; First ref macroblock top field texcoords
-    * decl i5              ; First ref macroblock bottom field texcoords (unused, packed in the same stream)
-    * decl i6              ; Second ref macroblock top field texcoords
-    * decl i7              ; Second ref macroblock bottom field texcoords (unused, packed in the same stream)
-    */
-   for (i = 0; i < 8; i++) {
-      decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
-
-   /*
-    * decl o0              ; Vertex pos
-    * decl o1              ; Luma texcoords
-    * decl o2              ; Chroma Cb texcoords
-    * decl o3              ; Chroma Cr texcoords
-    * decl o4              ; First ref macroblock texcoords
-    * decl o5              ; Second ref macroblock texcoords
-    */
-   for (i = 0; i < 6; i++) {
-      decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
-
-   /*
-    * mov o0, i0           ; Move input vertex pos to output
-    * mov o1, i1           ; Move input luma texcoords to output
-    * mov o2, i2           ; Move input chroma Cb texcoords to output
-    * mov o3, i3           ; Move input chroma Cr texcoords to output
-    */
-   for (i = 0; i < 4; ++i) {
-      inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i);
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-   }
+   vpos = ureg_DECL_vs_input(shader, 0);
+   for (i = 0; i < 4; ++i)
+      vtex[i] = ureg_DECL_vs_input(shader, i + 1);
+   /* Skip input 5 */
+   vtex[4] = ureg_DECL_vs_input(shader, 6);
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+   for (i = 0; i < 5; ++i)
+      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
 
    /*
-    * add o4, i0, i4       ; Translate vertex pos by motion vec to form first ref macroblock texcoords
-    * add o5, i0, i6       ; Translate vertex pos by motion vec to form second ref macroblock texcoords
+    * o_vpos = vpos
+    * o_vtex[0..2] = vtex[0..2]
+    * o_vtex[3..4] = vpos + vtex[3..4] // Apply motion vector
     */
-   for (i = 0; i < 2; ++i) {
-      inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2);
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-   }
+   ureg_MOV(shader, o_vpos, vpos);
+   for (i = 0; i < 3; ++i)
+      ureg_MOV(shader, o_vtex[i], vtex[i]);
+   for (i = 3; i < 5; ++i)
+      ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
 
-   /* end */
-   inst = vl_end();
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+   ureg_END(shader);
 
-   assert(ti <= max_tokens);
+   r->b_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
+   if (!r->b_vs[0])
+      return false;
 
-   vs.tokens = tokens;
-   r->b_vs[0] = r->pipe->create_vs_state(r->pipe, &vs);
-   free(tokens);
+   return true;
 }
 
 static void
@@ -542,125 +325,62 @@ create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
    assert(false);
 }
 
-static void
+static bool
 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
-   const unsigned max_tokens = 100;
-
-   struct pipe_shader_state fs;
-   struct tgsi_token *tokens;
-   struct tgsi_header *header;
-
-   struct tgsi_full_declaration decl;
-   struct tgsi_full_instruction inst;
-
-   unsigned ti;
-
+   struct ureg_program *shader;
+   struct ureg_src scale, blend;
+   struct ureg_src tc[5];
+   struct ureg_src sampler[5];
+   struct ureg_dst texel, ref[2];
+   struct ureg_dst fragment;
    unsigned i;
+   
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
 
-   assert(r);
-
-   tokens = (struct tgsi_token *) malloc(max_tokens * sizeof(struct tgsi_token));
-   *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
-   header = (struct tgsi_header *) &tokens[1];
-   *header = tgsi_build_header();
-   *(struct tgsi_processor *) &tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header);
-
-   ti = 3;
-
-   /*
-    * decl i0                      ; Luma texcoords
-    * decl i1                      ; Chroma Cb texcoords
-    * decl i2                      ; Chroma Cr texcoords
-    * decl i3                      ; First ref macroblock texcoords
-    * decl i4                      ; Second ref macroblock texcoords
-    */
-   for (i = 0; i < 5; ++i) {
-      decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-   }
-
-   /*
-    * decl c0                      ; Scaling factor, rescales 16-bit snorm to 9-bit snorm
-    * decl c1                      ; Constant 1/2 in .x channel to use as weight to blend past and future texels
-    */
-   decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /* decl o0                      ; Fragment color */
-   decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /* decl t0-t2 */
-   decl = vl_decl_temps(0, 2);
-   ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
-
-   /*
-    * decl s0                      ; Sampler for luma texture
-    * decl s1                      ; Sampler for chroma Cb texture
-    * decl s2                      ; Sampler for chroma Cr texture
-    * decl s3                      ; Sampler for first ref surface texture
-    * decl s4                      ; Sampler for second ref surface texture
-    */
-   for (i = 0; i < 5; ++i) {
-      decl = vl_decl_samplers(i, i);
-      ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti);
+   scale = ureg_DECL_constant(shader, 0);
+   blend = ureg_DECL_constant(shader, 1);
+   for (i = 0; i < 5; ++i)  {
+      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+      sampler[i] = ureg_DECL_sampler(shader, i);
    }
+   texel = ureg_DECL_temporary(shader);
+   ref[0] = ureg_DECL_temporary(shader);
+   ref[1] = ureg_DECL_temporary(shader);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
-    * tex2d t1, i0, s0             ; Read texel from luma texture
-    * mov t0.x, t1.x               ; Move luma sample into .x component
-    * tex2d t1, i1, s1             ; Read texel from chroma Cb texture
-    * mov t0.y, t1.x               ; Move Cb sample into .y component
-    * tex2d t1, i2, s2             ; Read texel from chroma Cr texture
-    * mov t0.z, t1.x               ; Move Cr sample into .z component
+    * texel.r = tex(tc[0], sampler[0])
+    * texel.g = tex(tc[1], sampler[1])
+    * texel.b = tex(tc[2], sampler[2])
+    * ref[0..1 = tex(tc[3..4], sampler[3..4])
+    * ref[0] = lerp(ref[0], ref[1], 0.5)
+    * fragment = texel * scale + ref[0]
     */
    for (i = 0; i < 3; ++i) {
-      inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i);
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-      inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-      inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+      /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
+      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
+      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
    }
+   ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
+   ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
+   ureg_LRP(shader, ref[0], ureg_swizzle(blend, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X),
+            ureg_src(ref[0]), ureg_src(ref[1]));
 
-   /* mul t0, t0, c0               ; Rescale texel to correct range */
-   inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0);
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-   /*
-    * tex2d t1, i3, s3             ; Read texel from first ref macroblock
-    * tex2d t2, i4, s4             ; Read texel from second ref macroblock
-    */
-   for (i = 0; i < 2; ++i) {
-      inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3);
-      ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-   }
+   ureg_MAD(shader, fragment, ureg_src(texel), scale, ureg_src(ref[0]));
 
-   /* lerp t1, c1.x, t1, t2        ; Blend past and future texels */
-   inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
-   inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-   inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-   inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-   inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
+   ureg_release_temporary(shader, texel);
+   ureg_release_temporary(shader, ref[0]);
+   ureg_release_temporary(shader, ref[1]);
+   ureg_END(shader);
 
-   /* add o0, t0, t1               ; Add past/future ref and differential to form final output */
-   inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-   /* end */
-   inst = vl_end();
-   ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
-
-   assert(ti <= max_tokens);
+   r->b_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
+   if (!r->b_fs[0])
+      return false;
 
-   fs.tokens = tokens;
-   r->b_fs[0] = r->pipe->create_fs_state(r->pipe, &fs);
-   free(tokens);
+   return true;
 }
 
 static void
diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c
deleted file mode 100644
index faa20a903cd..00000000000
--- a/src/gallium/auxiliary/vl/vl_shader_build.c
+++ /dev/null
@@ -1,242 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "vl_shader_build.h"
-#include <assert.h>
-#include <tgsi/tgsi_parse.h>
-#include <tgsi/tgsi_build.h>
-
-struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last)
-{
-   struct tgsi_full_declaration decl = tgsi_default_full_declaration();
-
-   decl.Declaration.File = TGSI_FILE_INPUT;
-   decl.Declaration.Semantic = 1;
-   decl.Semantic.SemanticName = name;
-   decl.Semantic.SemanticIndex = index;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
-
-   return decl;
-}
-
-struct tgsi_full_declaration vl_decl_interpolated_input
-(
-   unsigned int name,
-   unsigned int index,
-   unsigned int first,
-   unsigned int last,
-   int interpolation
-)
-{
-   struct tgsi_full_declaration decl = tgsi_default_full_declaration();
-
-   assert
-   (
-      interpolation == TGSI_INTERPOLATE_CONSTANT ||
-      interpolation == TGSI_INTERPOLATE_LINEAR ||
-      interpolation == TGSI_INTERPOLATE_PERSPECTIVE
-   );
-
-   decl.Declaration.File = TGSI_FILE_INPUT;
-   decl.Declaration.Semantic = 1;
-   decl.Semantic.SemanticName = name;
-   decl.Semantic.SemanticIndex = index;
-   decl.Declaration.Interpolate = interpolation;;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
-
-   return decl;
-}
-
-struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last)
-{
-   struct tgsi_full_declaration decl = tgsi_default_full_declaration();
-
-   decl.Declaration.File = TGSI_FILE_CONSTANT;
-   decl.Declaration.Semantic = 1;
-   decl.Semantic.SemanticName = name;
-   decl.Semantic.SemanticIndex = index;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
-
-   return decl;
-}
-
-struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last)
-{
-   struct tgsi_full_declaration decl = tgsi_default_full_declaration();
-
-   decl.Declaration.File = TGSI_FILE_OUTPUT;
-   decl.Declaration.Semantic = 1;
-   decl.Semantic.SemanticName = name;
-   decl.Semantic.SemanticIndex = index;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
-
-   return decl;
-}
-
-struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last)
-{
-   struct tgsi_full_declaration decl = tgsi_default_full_declaration();
-
-   decl = tgsi_default_full_declaration();
-   decl.Declaration.File = TGSI_FILE_TEMPORARY;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
-
-   return decl;
-}
-
-struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last)
-{
-   struct tgsi_full_declaration decl = tgsi_default_full_declaration();
-
-   decl = tgsi_default_full_declaration();
-   decl.Declaration.File = TGSI_FILE_SAMPLER;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
-
-   return decl;
-}
-
-struct tgsi_full_instruction vl_inst2
-(
-   int opcode,
-   enum tgsi_file_type dst_file,
-   unsigned int dst_index,
-   enum tgsi_file_type src_file,
-   unsigned int src_index
-)
-{
-   struct tgsi_full_instruction inst = tgsi_default_full_instruction();
-
-   inst.Instruction.Opcode = opcode;
-   inst.Instruction.NumDstRegs = 1;
-   inst.FullDstRegisters[0].DstRegister.File = dst_file;
-   inst.FullDstRegisters[0].DstRegister.Index = dst_index;
-   inst.Instruction.NumSrcRegs = 1;
-   inst.FullSrcRegisters[0].SrcRegister.File = src_file;
-   inst.FullSrcRegisters[0].SrcRegister.Index = src_index;
-
-   return inst;
-}
-
-struct tgsi_full_instruction vl_inst3
-(
-   int opcode,
-   enum tgsi_file_type dst_file,
-   unsigned int dst_index,
-   enum tgsi_file_type src1_file,
-   unsigned int src1_index,
-   enum tgsi_file_type src2_file,
-   unsigned int src2_index
-)
-{
-   struct tgsi_full_instruction inst = tgsi_default_full_instruction();
-
-   inst.Instruction.Opcode = opcode;
-   inst.Instruction.NumDstRegs = 1;
-   inst.FullDstRegisters[0].DstRegister.File = dst_file;
-   inst.FullDstRegisters[0].DstRegister.Index = dst_index;
-   inst.Instruction.NumSrcRegs = 2;
-   inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
-   inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
-   inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
-   inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
-
-   return inst;
-}
-
-struct tgsi_full_instruction vl_tex
-(
-   int tex,
-   enum tgsi_file_type dst_file,
-   unsigned int dst_index,
-   enum tgsi_file_type src1_file,
-   unsigned int src1_index,
-   enum tgsi_file_type src2_file,
-   unsigned int src2_index
-)
-{
-   struct tgsi_full_instruction inst = tgsi_default_full_instruction();
-
-   inst.Instruction.Opcode = TGSI_OPCODE_TEX;
-   inst.Instruction.NumDstRegs = 1;
-   inst.FullDstRegisters[0].DstRegister.File = dst_file;
-   inst.FullDstRegisters[0].DstRegister.Index = dst_index;
-   inst.Instruction.NumSrcRegs = 2;
-   inst.InstructionExtTexture.Texture = tex;
-   inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
-   inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
-   inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
-   inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
-
-   return inst;
-}
-
-struct tgsi_full_instruction vl_inst4
-(
-   int opcode,
-   enum tgsi_file_type dst_file,
-   unsigned int dst_index,
-   enum tgsi_file_type src1_file,
-   unsigned int src1_index,
-   enum tgsi_file_type src2_file,
-   unsigned int src2_index,
-   enum tgsi_file_type src3_file,
-   unsigned int src3_index
-)
-{
-   struct tgsi_full_instruction inst = tgsi_default_full_instruction();
-
-   inst.Instruction.Opcode = opcode;
-   inst.Instruction.NumDstRegs = 1;
-   inst.FullDstRegisters[0].DstRegister.File = dst_file;
-   inst.FullDstRegisters[0].DstRegister.Index = dst_index;
-   inst.Instruction.NumSrcRegs = 3;
-   inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
-   inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
-   inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
-   inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
-   inst.FullSrcRegisters[2].SrcRegister.File = src3_file;
-   inst.FullSrcRegisters[2].SrcRegister.Index = src3_index;
-
-   return inst;
-}
-
-struct tgsi_full_instruction vl_end(void)
-{
-   struct tgsi_full_instruction inst = tgsi_default_full_instruction();
-
-   inst.Instruction.Opcode = TGSI_OPCODE_END;
-   inst.Instruction.NumDstRegs = 0;
-   inst.Instruction.NumSrcRegs = 0;
-
-   return inst;
-}
diff --git a/src/gallium/auxiliary/vl/vl_shader_build.h b/src/gallium/auxiliary/vl/vl_shader_build.h
deleted file mode 100644
index 5da71f8e136..00000000000
--- a/src/gallium/auxiliary/vl/vl_shader_build.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef vl_shader_build_h
-#define vl_shader_build_h
-
-#include <pipe/p_shader_tokens.h>
-
-struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last);
-struct tgsi_full_declaration vl_decl_interpolated_input
-(
-   unsigned int name,
-   unsigned int index,
-   unsigned int first,
-   unsigned int last,
-   int interpolation
-);
-struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last);
-struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last);
-struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last);
-struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last);
-struct tgsi_full_instruction vl_inst2
-(
-   int opcode,
-   enum tgsi_file_type dst_file,
-   unsigned int dst_index,
-   enum tgsi_file_type src_file,
-   unsigned int src_index
-);
-struct tgsi_full_instruction vl_inst3
-(
-   int opcode,
-   enum tgsi_file_type dst_file,
-   unsigned int dst_index,
-   enum tgsi_file_type src1_file,
-   unsigned int src1_index,
-   enum tgsi_file_type src2_file,
-   unsigned int src2_index
-);
-struct tgsi_full_instruction vl_tex
-(
-   int tex,
-   enum tgsi_file_type dst_file,
-   unsigned int dst_index,
-   enum tgsi_file_type src1_file,
-   unsigned int src1_index,
-   enum tgsi_file_type src2_file,
-   unsigned int src2_index
-);
-struct tgsi_full_instruction vl_inst4
-(
-   int opcode,
-   enum tgsi_file_type dst_file,
-   unsigned int dst_index,
-   enum tgsi_file_type src1_file,
-   unsigned int src1_index,
-   enum tgsi_file_type src2_file,
-   unsigned int src2_index,
-   enum tgsi_file_type src3_file,
-   unsigned int src3_index
-);
-struct tgsi_full_instruction vl_end(void);
-
-#endif
-- 
cgit v1.2.3


From c756cb8463711ec7ac098bebdfb1d2aa15f40e5c Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 7 Nov 2009 20:20:09 -0500
Subject: g3dvl: Use immediates in shaders where possible.

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 46 +++---------------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  1 -
 2 files changed, 5 insertions(+), 42 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index adefafd3e9e..04afa956444 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -40,6 +40,7 @@
 #define BLOCK_HEIGHT 8
 #define ZERO_BLOCK_NIL -1.0f
 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
+#define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
 
 struct vertex2f
 {
@@ -62,17 +63,6 @@ struct fragment_shader_consts
    struct vertex4f div;
 };
 
-/*
- * Muliplier renormalizes block samples from 16 bits to 12 bits.
- * Divider is used when calculating Y % 2 for choosing top or bottom
- * field for P or B macroblocks.
- * TODO: Use immediates.
- */
-static const struct fragment_shader_consts fs_consts = {
-   {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f},
-   {0.5f, 2.0f, 0.0f, 0.0f}
-};
-
 struct vert_stream_0
 {
    struct vertex2f pos;
@@ -134,7 +124,6 @@ static bool
 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src scale;
    struct ureg_src tc[3];
    struct ureg_src sampler[3];
    struct ureg_dst texel, temp;
@@ -145,7 +134,6 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
    if (!shader)
       return false;
 
-   scale = ureg_DECL_constant(shader, 0);
    for (i = 0; i < 3; ++i)  {
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
       sampler[i] = ureg_DECL_sampler(shader, i);
@@ -165,7 +153,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
       ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
    }
-   ureg_MUL(shader, fragment, ureg_src(texel), scale);
+   ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
 
    ureg_release_temporary(shader, texel);
    ureg_release_temporary(shader, temp);
@@ -226,7 +214,6 @@ static bool
 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src scale;
    struct ureg_src tc[4];
    struct ureg_src sampler[4];
    struct ureg_dst texel, ref;
@@ -237,7 +224,6 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    if (!shader)
       return false;
 
-   scale = ureg_DECL_constant(shader, 0);
    for (i = 0; i < 4; ++i)  {
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
       sampler[i] = ureg_DECL_sampler(shader, i);
@@ -259,7 +245,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
    }
    ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
-   ureg_MAD(shader, fragment, ureg_src(texel), scale, ureg_src(ref));
+   ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
 
    ureg_release_temporary(shader, texel);
    ureg_release_temporary(shader, ref);
@@ -329,7 +315,6 @@ static bool
 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src scale, blend;
    struct ureg_src tc[5];
    struct ureg_src sampler[5];
    struct ureg_dst texel, ref[2];
@@ -340,8 +325,6 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    if (!shader)
       return false;
 
-   scale = ureg_DECL_constant(shader, 0);
-   blend = ureg_DECL_constant(shader, 1);
    for (i = 0; i < 5; ++i)  {
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
       sampler[i] = ureg_DECL_sampler(shader, i);
@@ -366,10 +349,9 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    }
    ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
    ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
-   ureg_LRP(shader, ref[0], ureg_swizzle(blend, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X),
-            ureg_src(ref[0]), ureg_src(ref[1]));
+   ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
 
-   ureg_MAD(shader, fragment, ureg_src(texel), scale, ureg_src(ref[0]));
+   ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
 
    ureg_release_temporary(shader, texel);
    ureg_release_temporary(shader, ref[0]);
@@ -658,21 +640,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
       sizeof(struct vertex_shader_consts)
    );
 
-   r->fs_const_buf.buffer = pipe_buffer_create
-   (
-      r->pipe->screen,
-      DEFAULT_BUF_ALIGNMENT,
-      PIPE_BUFFER_USAGE_CONSTANT, sizeof(struct fragment_shader_consts)
-   );
-
-   memcpy
-   (
-      pipe_buffer_map(r->pipe->screen, r->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
-      &fs_consts, sizeof(struct fragment_shader_consts)
-   );
-
-   pipe_buffer_unmap(r->pipe->screen, r->fs_const_buf.buffer);
-
    return true;
 }
 
@@ -684,7 +651,6 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
    assert(r);
 
    pipe_buffer_reference(&r->vs_const_buf.buffer, NULL);
-   pipe_buffer_reference(&r->fs_const_buf.buffer, NULL);
 
    for (i = 0; i < 3; ++i)
       pipe_buffer_reference(&r->vertex_bufs.all[i].buffer, NULL);
@@ -1004,8 +970,6 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
                                 &r->vs_const_buf);
-   r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_FRAGMENT, 0,
-                                &r->fs_const_buf);
 
    if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 5d2c1273ee3..85d8c5808d5 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -63,7 +63,6 @@ struct vl_mpeg12_mc_renderer
 
    struct pipe_viewport_state viewport;
    struct pipe_constant_buffer vs_const_buf;
-   struct pipe_constant_buffer fs_const_buf;
    struct pipe_framebuffer_state fb_state;
    struct pipe_vertex_element vertex_elems[8];
 	
-- 
cgit v1.2.3


From 8cdfa77b18b62687870824d998b7d5d21204d2ce Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Mon, 9 Nov 2009 16:55:37 -0500
Subject: g3dvl: Use a func instead of large ugly macro to gen per-block verts.

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 226 +++++++++++++----------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   9 +-
 2 files changed, 138 insertions(+), 97 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 04afa956444..cedda8b1d4a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #include "vl_mpeg12_mc_renderer.h"
@@ -42,11 +42,6 @@
 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
 
-struct vertex2f
-{
-   float x, y;
-};
-
 struct vertex4f
 {
    float x, y, z, w;
@@ -91,7 +86,7 @@ create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_src vpos, vtex[3];
    struct ureg_dst o_vpos, o_vtex[3];
    unsigned i;
-   
+
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
       return false;
@@ -129,7 +124,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_dst texel, temp;
    struct ureg_dst fragment;
    unsigned i;
-   
+
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return false;
@@ -173,7 +168,7 @@ create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_src vpos, vtex[4];
    struct ureg_dst o_vpos, o_vtex[4];
    unsigned i;
-   
+
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
       return false;
@@ -219,7 +214,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_dst texel, ref;
    struct ureg_dst fragment;
    unsigned i;
-   
+
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return false;
@@ -271,7 +266,7 @@ create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_src vpos, vtex[5];
    struct ureg_dst o_vpos, o_vtex[5];
    unsigned i;
-   
+
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
       return false;
@@ -320,7 +315,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_dst texel, ref[2];
    struct ureg_dst fragment;
    unsigned i;
-   
+
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return false;
@@ -686,73 +681,105 @@ get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
    return -1;
 }
 
-/* XXX: One of these days this will have to be killed with fire */
-#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, use_zb, zb)				\
-	do {															\
-	(vb)[0].pos.x = (mbx) * (unitx) + (ofsx);		(vb)[0].pos.y = (mby) * (unity) + (ofsy);			\
-	(vb)[1].pos.x = (mbx) * (unitx) + (ofsx);		(vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy);		\
-	(vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx);	(vb)[2].pos.y = (mby) * (unity) + (ofsy);			\
-	(vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx);	(vb)[3].pos.y = (mby) * (unity) + (ofsy);			\
-	(vb)[4].pos.x = (mbx) * (unitx) + (ofsx);		(vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy);		\
-	(vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx);	(vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy);		\
-																\
-	if (!use_zb || (cbp) & (lm))												\
-	{															\
-		(vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx);		(vb)[0].luma_tc.y = (mby) * (unity) + (ofsy);		\
-		(vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx);		(vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy);	\
-		(vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx);	(vb)[2].luma_tc.y = (mby) * (unity) + (ofsy);		\
-		(vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx);	(vb)[3].luma_tc.y = (mby) * (unity) + (ofsy);		\
-		(vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx);		(vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy);	\
-		(vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx);	(vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy);	\
-	}															\
-	else															\
-	{															\
-		(vb)[0].luma_tc.x = (zb)[0].x;		(vb)[0].luma_tc.y = (zb)[0].y;						\
-		(vb)[1].luma_tc.x = (zb)[0].x;		(vb)[1].luma_tc.y = (zb)[0].y + (hy);					\
-		(vb)[2].luma_tc.x = (zb)[0].x + (hx);	(vb)[2].luma_tc.y = (zb)[0].y;						\
-		(vb)[3].luma_tc.x = (zb)[0].x + (hx);	(vb)[3].luma_tc.y = (zb)[0].y;						\
-		(vb)[4].luma_tc.x = (zb)[0].x;		(vb)[4].luma_tc.y = (zb)[0].y + (hy);					\
-		(vb)[5].luma_tc.x = (zb)[0].x + (hx);	(vb)[5].luma_tc.y = (zb)[0].y + (hy);					\
-	}															\
-																\
-	if (!use_zb || (cbp) & (cbm))												\
-	{															\
-		(vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx);		(vb)[0].cb_tc.y = (mby) * (unity) + (ofsy);		\
-		(vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx);		(vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy);	\
-		(vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx);	(vb)[2].cb_tc.y = (mby) * (unity) + (ofsy);		\
-		(vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx);	(vb)[3].cb_tc.y = (mby) * (unity) + (ofsy);		\
-		(vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx);		(vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy);	\
-		(vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx);	(vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy);	\
-	}															\
-	else															\
-	{															\
-		(vb)[0].cb_tc.x = (zb)[1].x;		(vb)[0].cb_tc.y = (zb)[1].y;						\
-		(vb)[1].cb_tc.x = (zb)[1].x;		(vb)[1].cb_tc.y = (zb)[1].y + (hy);					\
-		(vb)[2].cb_tc.x = (zb)[1].x + (hx);	(vb)[2].cb_tc.y = (zb)[1].y;						\
-		(vb)[3].cb_tc.x = (zb)[1].x + (hx);	(vb)[3].cb_tc.y = (zb)[1].y;						\
-		(vb)[4].cb_tc.x = (zb)[1].x;		(vb)[4].cb_tc.y = (zb)[1].y + (hy);					\
-		(vb)[5].cb_tc.x = (zb)[1].x + (hx);	(vb)[5].cb_tc.y = (zb)[1].y + (hy);					\
-	}															\
-																\
-	if (!use_zb || (cbp) & (crm))												\
-	{															\
-		(vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx);		(vb)[0].cr_tc.y = (mby) * (unity) + (ofsy);		\
-		(vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx);		(vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy);	\
-		(vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx);	(vb)[2].cr_tc.y = (mby) * (unity) + (ofsy);		\
-		(vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx);	(vb)[3].cr_tc.y = (mby) * (unity) + (ofsy);		\
-		(vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx);		(vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy);	\
-		(vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx);	(vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy);	\
-	}															\
-	else															\
-	{															\
-		(vb)[0].cr_tc.x = (zb)[2].x;		(vb)[0].cr_tc.y = (zb)[2].y;						\
-		(vb)[1].cr_tc.x = (zb)[2].x;		(vb)[1].cr_tc.y = (zb)[2].y + (hy);					\
-		(vb)[2].cr_tc.x = (zb)[2].x + (hx);	(vb)[2].cr_tc.y = (zb)[2].y;						\
-		(vb)[3].cr_tc.x = (zb)[2].x + (hx);	(vb)[3].cr_tc.y = (zb)[2].y;						\
-		(vb)[4].cr_tc.x = (zb)[2].x;		(vb)[4].cr_tc.y = (zb)[2].y + (hy);					\
-		(vb)[5].cr_tc.x = (zb)[2].x + (hx);	(vb)[5].cr_tc.y = (zb)[2].y + (hy);					\
-	}															\
-	} while (0)
+static void
+gen_block_verts(struct vert_stream_0 *vb, unsigned cbp, unsigned mbx, unsigned mby,
+                const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
+                unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
+                bool use_zeroblocks, struct vertex2f *zero_blocks)
+{
+   struct vertex2f v;
+
+   assert(vb);
+   assert(unit && half && offset);
+   assert(zero_blocks || !use_zeroblocks);
+
+   /* Generate vertices for two triangles covering a block */
+   v.x = mbx * unit->x + offset->x;
+   v.y = mby * unit->y + offset->y;
+
+   vb[0].pos.x = v.x;
+   vb[0].pos.y = v.y;
+   vb[1].pos.x = v.x;
+   vb[1].pos.y = v.y + half->y;
+   vb[2].pos.x = v.x + half->x;
+   vb[2].pos.y = v.y;
+   vb[3].pos.x = v.x + half->x;
+   vb[3].pos.y = v.y;
+   vb[4].pos.x = v.x;
+   vb[4].pos.y = v.y + half->y;
+   vb[5].pos.x = v.x + half->x;
+   vb[5].pos.y = v.y + half->y;
+
+   /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
+      or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
+      for this channel is defined for this block) */
+
+   if (!use_zeroblocks || cbp & luma_mask) {
+      v.x = mbx * unit->x + offset->x;
+      v.y = mby * unit->y + offset->y;
+   }
+   else {
+      v.x = zero_blocks[0].x;
+      v.y = zero_blocks[0].y;
+   }
+
+   vb[0].luma_tc.x = v.x;
+   vb[0].luma_tc.y = v.y;
+   vb[1].luma_tc.x = v.x;
+   vb[1].luma_tc.y = v.y + half->y;
+   vb[2].luma_tc.x = v.x + half->x;
+   vb[2].luma_tc.y = v.y;
+   vb[3].luma_tc.x = v.x + half->x;
+   vb[3].luma_tc.y = v.y;
+   vb[4].luma_tc.x = v.x;
+   vb[4].luma_tc.y = v.y + half->y;
+   vb[5].luma_tc.x = v.x + half->x;
+   vb[5].luma_tc.y = v.y + half->y;
+
+   if (!use_zeroblocks || cbp & cb_mask) {
+      v.x = mbx * unit->x + offset->x;
+      v.y = mby * unit->y + offset->y;
+   }
+   else {
+      v.x = zero_blocks[1].x;
+      v.y = zero_blocks[1].y;
+   }
+
+   vb[0].cb_tc.x = v.x;
+   vb[0].cb_tc.y = v.y;
+   vb[1].cb_tc.x = v.x;
+   vb[1].cb_tc.y = v.y + half->y;
+   vb[2].cb_tc.x = v.x + half->x;
+   vb[2].cb_tc.y = v.y;
+   vb[3].cb_tc.x = v.x + half->x;
+   vb[3].cb_tc.y = v.y;
+   vb[4].cb_tc.x = v.x;
+   vb[4].cb_tc.y = v.y + half->y;
+   vb[5].cb_tc.x = v.x + half->x;
+   vb[5].cb_tc.y = v.y + half->y;
+
+   if (!use_zeroblocks || cbp & cr_mask) {
+      v.x = mbx * unit->x + offset->x;
+      v.y = mby * unit->y + offset->y;
+   }
+   else {
+      v.x = zero_blocks[2].x;
+      v.y = zero_blocks[2].y;
+   }
+
+   vb[0].cr_tc.x = v.x;
+   vb[0].cr_tc.y = v.y;
+   vb[1].cr_tc.x = v.x;
+   vb[1].cr_tc.y = v.y + half->y;
+   vb[2].cr_tc.x = v.x + half->x;
+   vb[2].cr_tc.y = v.y;
+   vb[3].cr_tc.x = v.x + half->x;
+   vb[3].cr_tc.y = v.y;
+   vb[4].cr_tc.x = v.x;
+   vb[4].cr_tc.y = v.y + half->y;
+   vb[5].cr_tc.x = v.x + half->x;
+   vb[5].cr_tc.y = v.y + half->y;
+}
 
 static void
 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
@@ -857,25 +884,34 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
             r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
             r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
          };
+         const struct vertex2f offsets[2][2] =
+         {
+            {
+               {0, 0}, {0, half.y}
+            },
+            {
+               {half.x, 0}, {half.x, half.y}
+            }
+         };
          const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
 
          struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
 
-         SET_BLOCK(vb, mb->cbp, mb->mbx, mb->mby,
-                   unit.x, unit.y, 0, 0, half.x, half.y,
-                   32, 2, 1, use_zb, r->zero_block);
+         gen_block_verts(vb, mb->cbp, mb->mbx, mb->mby,
+                         &unit, &half, &offsets[0][0],
+                         32, 2, 1, use_zb, r->zero_block);
 
-         SET_BLOCK(vb + 6, mb->cbp, mb->mbx, mb->mby,
-                   unit.x, unit.y, half.x, 0, half.x, half.y,
-                   16, 2, 1, use_zb, r->zero_block);
+         gen_block_verts(vb + 6, mb->cbp, mb->mbx, mb->mby,
+                         &unit, &half, &offsets[1][0],
+                         16, 2, 1, use_zb, r->zero_block);
 
-         SET_BLOCK(vb + 12, mb->cbp, mb->mbx, mb->mby,
-                   unit.x, unit.y, 0, half.y, half.x, half.y,
-                   8, 2, 1, use_zb, r->zero_block);
+         gen_block_verts(vb + 12, mb->cbp, mb->mbx, mb->mby,
+                         &unit, &half, &offsets[0][1],
+                         8, 2, 1, use_zb, r->zero_block);
 
-         SET_BLOCK(vb + 18, mb->cbp, mb->mbx, mb->mby,
-                   unit.x, unit.y, half.x, half.y, half.x, half.y,
-                   4, 2, 1, use_zb, r->zero_block);
+         gen_block_verts(vb + 18, mb->cbp, mb->mbx, mb->mby,
+                         &unit, &half, &offsets[1][1],
+                         4, 2, 1, use_zb, r->zero_block);
 
          break;
       }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 85d8c5808d5..a222af1c214 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -50,6 +50,11 @@ enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK
    VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE /* Needs conditional texel fetch! */
 };
 
+struct vertex2f
+{
+   float x, y;
+};
+
 struct vl_mpeg12_mc_renderer
 {
    struct pipe_context *pipe;
@@ -93,8 +98,8 @@ struct vl_mpeg12_mc_renderer
    struct pipe_mpeg12_macroblock *macroblock_buf;
    struct pipe_transfer *tex_transfer[3];
    short *texels[3];
-   struct { float x, y; } surface_tex_inv_size;
-   struct { float x, y; } zero_block[3];
+   struct vertex2f surface_tex_inv_size;
+   struct vertex2f zero_block[3];
 };
 
 bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
-- 
cgit v1.2.3


From 334676ed9a185bc0836d2ffe070e21210fac1bde Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 14 Nov 2009 14:00:52 -0500
Subject: nouveau: Link with softpipe.

---
 src/gallium/winsys/drm/nouveau/dri/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/winsys/drm/nouveau/dri/Makefile b/src/gallium/winsys/drm/nouveau/dri/Makefile
index 0937f68c34b..61abc439d31 100644
--- a/src/gallium/winsys/drm/nouveau/dri/Makefile
+++ b/src/gallium/winsys/drm/nouveau/dri/Makefile
@@ -12,6 +12,7 @@ PIPE_DRIVERS = \
 	$(TOP)/src/gallium/drivers/nv30/libnv30.a \
 	$(TOP)/src/gallium/drivers/nv40/libnv40.a \
 	$(TOP)/src/gallium/drivers/nv50/libnv50.a \
+	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
 	$(TOP)/src/gallium/drivers/nouveau/libnouveau.a
 
 DRIVER_SOURCES =
-- 
cgit v1.2.3


From 5f730690f8166c94ee010605b6437a6fb2a7771d Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sun, 22 Nov 2009 16:40:15 -0500
Subject: g3dvl: Basic subpicture support.

RGB subpictures only at the moment.
---
 src/gallium/auxiliary/vl/vl_compositor.c           | 349 ++++++++++++---------
 src/gallium/auxiliary/vl/vl_compositor.h           |  41 ++-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c   |   5 -
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h   |   6 +-
 src/gallium/auxiliary/vl/vl_types.h                |  41 +++
 src/gallium/state_trackers/xorg/xvmc/subpicture.c  |  85 ++++-
 .../state_trackers/xorg/xvmc/tests/.gitignore      |   1 +
 .../state_trackers/xorg/xvmc/tests/Makefile        |   7 +-
 .../xorg/xvmc/tests/test_subpicture.c              | 182 +++++++++++
 .../state_trackers/xorg/xvmc/xvmc_private.h        |   8 +
 10 files changed, 542 insertions(+), 183 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_types.h
 create mode 100644 src/gallium/state_trackers/xorg/xvmc/tests/test_subpicture.c

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 810c7759596..529c0b6e364 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #include "vl_compositor.h"
@@ -33,16 +33,6 @@
 #include <util/u_memory.h>
 #include "vl_csc.h"
 
-struct vertex2f
-{
-   float x, y;
-};
-
-struct vertex4f
-{
-   float x, y, z, w;
-};
-
 struct vertex_shader_consts
 {
    struct vertex4f dst_scale;
@@ -56,51 +46,45 @@ struct fragment_shader_consts
    float matrix[16];
 };
 
-/*
- * Represents 2 triangles in a strip in normalized coords.
- * Used to render the surface onto the frame buffer.
- */
-static const struct vertex2f surface_verts[4] =
+static bool
+u_video_rects_equal(struct pipe_video_rect *a, struct pipe_video_rect *b)
 {
-   {0.0f, 0.0f},
-   {0.0f, 1.0f},
-   {1.0f, 0.0f},
-   {1.0f, 1.0f}
-};
+   assert(a && b);
 
-/*
- * Represents texcoords for the above. We can use the position values directly.
- * TODO: Duplicate these in the shader, no need to create a buffer.
- */
-static const struct vertex2f *surface_texcoords = surface_verts;
+   if (a->x != b->x)
+      return false;
+   if (a->y != b->y)
+      return false;
+   if (a->w != b->w)
+      return false;
+   if (a->h != b->h)
+      return false;
+
+   return true;
+}
 
 static bool
 create_vert_shader(struct vl_compositor *c)
 {
    struct ureg_program *shader;
    struct ureg_src vpos, vtex;
-   struct ureg_src vpos_scale, vpos_trans, vtex_scale, vtex_trans;
    struct ureg_dst o_vpos, o_vtex;
-   
+
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
       return false;
 
    vpos = ureg_DECL_vs_input(shader, 0);
    vtex = ureg_DECL_vs_input(shader, 1);
-   vpos_scale = ureg_DECL_constant(shader, 0);
-   vpos_trans = ureg_DECL_constant(shader, 1);
-   vtex_scale = ureg_DECL_constant(shader, 2);
-   vtex_trans = ureg_DECL_constant(shader, 3);
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
    o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1);
 
    /*
-    * o_vpos = vpos * vpos_scale + vpos_trans
-    * o_vtex = vtex * vtex_scale + vtex_trans
+    * o_vpos = vpos
+    * o_vtex = vtex
     */
-   ureg_MAD(shader, o_vpos, vpos, vpos_scale, vpos_trans);
-   ureg_MAD(shader, o_vtex, vtex, vtex_scale, vtex_trans);
+   ureg_MOV(shader, o_vpos, vpos);
+   ureg_MOV(shader, o_vtex, vtex);
 
    ureg_END(shader);
 
@@ -121,7 +105,7 @@ create_frag_shader(struct vl_compositor *c)
    struct ureg_dst texel;
    struct ureg_dst fragment;
    unsigned i;
-   
+
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return false;
@@ -177,14 +161,14 @@ init_pipe_state(struct vl_compositor *c)
    /*sampler.border_color[i] = ;*/
    /*sampler.max_anisotropy = ;*/
    c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler);
-	
+
    return true;
 }
 
 static void cleanup_pipe_state(struct vl_compositor *c)
 {
    assert(c);
-	
+
    c->pipe->delete_sampler_state(c->pipe, c->sampler);
 }
 
@@ -202,7 +186,7 @@ init_shaders(struct vl_compositor *c)
 static void cleanup_shaders(struct vl_compositor *c)
 {
    assert(c);
-	
+
    c->pipe->delete_vs_state(c->pipe, c->vertex_shader);
    c->pipe->delete_fs_state(c->pipe, c->fragment_shader);
 }
@@ -213,79 +197,30 @@ init_buffers(struct vl_compositor *c)
    struct fragment_shader_consts fsc;
 
    assert(c);
-	
+
    /*
-    * Create our vertex buffer and vertex buffer element
-    * VB contains 4 vertices that render a quad covering the entire window
-    * to display a rendered surface
-    * Quad is rendered as a tri strip
+    * Create our vertex buffer and vertex buffer elements
     */
-   c->vertex_bufs[0].stride = sizeof(struct vertex2f);
-   c->vertex_bufs[0].max_index = 3;
-   c->vertex_bufs[0].buffer_offset = 0;
-   c->vertex_bufs[0].buffer = pipe_buffer_create
+   c->vertex_buf.stride = sizeof(struct vertex4f);
+   c->vertex_buf.max_index = (VL_COMPOSITOR_MAX_LAYERS + 2) * 6 - 1;
+   c->vertex_buf.buffer_offset = 0;
+   c->vertex_buf.buffer = pipe_buffer_create
    (
       c->pipe->screen,
       1,
       PIPE_BUFFER_USAGE_VERTEX,
-      sizeof(struct vertex2f) * 4
-   );
-
-   memcpy
-   (
-      pipe_buffer_map(c->pipe->screen, c->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
-      surface_verts,
-      sizeof(struct vertex2f) * 4
+      sizeof(struct vertex4f) * (VL_COMPOSITOR_MAX_LAYERS + 2) * 6
    );
 
-   pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[0].buffer);
-
    c->vertex_elems[0].src_offset = 0;
    c->vertex_elems[0].vertex_buffer_index = 0;
    c->vertex_elems[0].nr_components = 2;
    c->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /*
-    * Create our texcoord buffer and texcoord buffer element
-    * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices
-    */
-   c->vertex_bufs[1].stride = sizeof(struct vertex2f);
-   c->vertex_bufs[1].max_index = 3;
-   c->vertex_bufs[1].buffer_offset = 0;
-   c->vertex_bufs[1].buffer = pipe_buffer_create
-   (
-      c->pipe->screen,
-      1,
-      PIPE_BUFFER_USAGE_VERTEX,
-      sizeof(struct vertex2f) * 4
-   );
-
-   memcpy
-   (
-      pipe_buffer_map(c->pipe->screen, c->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
-      surface_texcoords,
-      sizeof(struct vertex2f) * 4
-   );
-
-   pipe_buffer_unmap(c->pipe->screen, c->vertex_bufs[1].buffer);
-
-   c->vertex_elems[1].src_offset = 0;
-   c->vertex_elems[1].vertex_buffer_index = 1;
+   c->vertex_elems[1].src_offset = sizeof(struct vertex2f);
+   c->vertex_elems[1].vertex_buffer_index = 0;
    c->vertex_elems[1].nr_components = 2;
    c->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /*
-    * Create our vertex shader's constant buffer
-    * Const buffer contains scaling and translation vectors
-    */
-   c->vs_const_buf.buffer = pipe_buffer_create
-   (
-      c->pipe->screen,
-      1,
-      PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD,
-      sizeof(struct vertex_shader_consts)
-   );
-
    /*
     * Create our fragment shader's constant buffer
     * Const buffer contains the color conversion matrix and bias vectors
@@ -308,19 +243,16 @@ init_buffers(struct vl_compositor *c)
 static void
 cleanup_buffers(struct vl_compositor *c)
 {
-   unsigned i;
-
    assert(c);
-	
-   for (i = 0; i < 2; ++i)
-      pipe_buffer_reference(&c->vertex_bufs[i].buffer, NULL);
 
-   pipe_buffer_reference(&c->vs_const_buf.buffer, NULL);
+   pipe_buffer_reference(&c->vertex_buf.buffer, NULL);
    pipe_buffer_reference(&c->fs_const_buf.buffer, NULL);
 }
 
 bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe)
 {
+   unsigned i;
+
    assert(compositor);
 
    memset(compositor, 0, sizeof(struct vl_compositor));
@@ -339,21 +271,161 @@ bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *p
       return false;
    }
 
+   compositor->fb_state.width = 0;
+   compositor->fb_state.height = 0;
+   compositor->bg = NULL;
+   compositor->dirty_bg = false;
+   for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i)
+      compositor->layers[i] = NULL;
+   compositor->dirty_layers = 0;
+
    return true;
 }
 
 void vl_compositor_cleanup(struct vl_compositor *compositor)
 {
    assert(compositor);
-	
+
    cleanup_buffers(compositor);
    cleanup_shaders(compositor);
    cleanup_pipe_state(compositor);
 }
 
+void vl_compositor_set_background(struct vl_compositor *compositor,
+                                 struct pipe_texture *bg, struct pipe_video_rect *bg_src_rect)
+{
+   assert(compositor);
+   assert((bg && bg_src_rect) || (!bg && !bg_src_rect));
+
+   if (compositor->bg != bg ||
+       !u_video_rects_equal(&compositor->bg_src_rect, bg_src_rect)) {
+      pipe_texture_reference(&compositor->bg, bg);
+      /*if (!u_video_rects_equal(&compositor->bg_src_rect, bg_src_rect))*/
+         compositor->bg_src_rect = *bg_src_rect;
+      compositor->dirty_bg = true;
+   }
+}
+
+void vl_compositor_set_layers(struct vl_compositor *compositor,
+                              struct pipe_texture *layers[],
+                              struct pipe_video_rect *src_rects[],
+                              struct pipe_video_rect *dst_rects[],
+                              unsigned num_layers)
+{
+   unsigned i;
+
+   assert(compositor);
+   assert(num_layers <= VL_COMPOSITOR_MAX_LAYERS);
+
+   for (i = 0; i < num_layers; ++i)
+   {
+      assert((layers[i] && src_rects[i] && dst_rects[i]) ||
+             (!layers[i] && !src_rects[i] && !dst_rects[i]));
+
+      if (compositor->layers[i] != layers[i] ||
+          !u_video_rects_equal(&compositor->layer_src_rects[i], src_rects[i]) ||
+          !u_video_rects_equal(&compositor->layer_dst_rects[i], dst_rects[i]))
+      {
+         pipe_texture_reference(&compositor->layers[i], layers[i]);
+         /*if (!u_video_rects_equal(&compositor->layer_src_rects[i], src_rects[i]))*/
+            compositor->layer_src_rects[i] = *src_rects[i];
+         /*if (!u_video_rects_equal(&compositor->layer_dst_rects[i], dst_rects[i]))*/
+            compositor->layer_dst_rects[i] = *dst_rects[i];
+         compositor->dirty_layers |= 1 << i;
+      }
+   }
+
+   for (; i < VL_COMPOSITOR_MAX_LAYERS; ++i)
+      pipe_texture_reference(&compositor->layers[i], NULL);
+}
+
+static void gen_rect_verts(unsigned pos,
+                           struct pipe_video_rect *src_rect,
+                           struct vertex2f *src_inv_size,
+                           struct pipe_video_rect *dst_rect,
+                           struct vertex2f *dst_inv_size,
+                           struct vertex4f *vb)
+{
+   assert(pos < VL_COMPOSITOR_MAX_LAYERS + 2);
+   assert(src_rect);
+   assert(src_inv_size);
+   assert((dst_rect && dst_inv_size) || (!dst_rect && !dst_inv_size));
+   assert(vb);
+
+   vb[pos * 6 + 0].x = dst_rect->x * dst_inv_size->x;
+   vb[pos * 6 + 0].y = dst_rect->y * dst_inv_size->y;
+   vb[pos * 6 + 0].z = src_rect->x * src_inv_size->x;
+   vb[pos * 6 + 0].w = src_rect->y * src_inv_size->y;
+
+   vb[pos * 6 + 1].x = dst_rect->x * dst_inv_size->x;
+   vb[pos * 6 + 1].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
+   vb[pos * 6 + 1].z = src_rect->x * src_inv_size->x;
+   vb[pos * 6 + 1].w = (src_rect->y + src_rect->h) * src_inv_size->y;
+
+   vb[pos * 6 + 2].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
+   vb[pos * 6 + 2].y = dst_rect->y * dst_inv_size->y;
+   vb[pos * 6 + 2].z = (src_rect->x + src_rect->w) * src_inv_size->x;
+   vb[pos * 6 + 2].w = src_rect->y * src_inv_size->y;
+
+   vb[pos * 6 + 3].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
+   vb[pos * 6 + 3].y = dst_rect->y * dst_inv_size->y;
+   vb[pos * 6 + 3].z = (src_rect->x + src_rect->w) * src_inv_size->x;
+   vb[pos * 6 + 3].w = src_rect->y * src_inv_size->y;
+
+   vb[pos * 6 + 4].x = dst_rect->x * dst_inv_size->x;
+   vb[pos * 6 + 4].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
+   vb[pos * 6 + 4].z = src_rect->x * src_inv_size->x;
+   vb[pos * 6 + 4].w = (src_rect->y + src_rect->h) * src_inv_size->y;
+
+   vb[pos * 6 + 5].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
+   vb[pos * 6 + 5].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
+   vb[pos * 6 + 5].z = (src_rect->x + src_rect->w) * src_inv_size->x;
+   vb[pos * 6 + 5].w = (src_rect->y + src_rect->h) * src_inv_size->y;
+}
+
+static unsigned gen_verts(struct vl_compositor *c,
+                          struct pipe_video_rect *src_rect,
+                          struct vertex2f *src_inv_size,
+                          struct pipe_video_rect *dst_rect)
+{
+   void *vb;
+   unsigned num_rects = 0;
+   unsigned i;
+
+   assert(c);
+   assert(src_rect);
+   assert(src_inv_size);
+   assert(dst_rect);
+
+   vb = pipe_buffer_map(c->pipe->screen, c->vertex_buf.buffer,
+                        PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD);
+
+   if (c->dirty_bg) {
+      struct vertex2f bg_inv_size = {1.0f / c->bg->width[0], 1.0f / c->bg->height[0]};
+      gen_rect_verts(num_rects++, &c->bg_src_rect, &bg_inv_size, NULL, NULL, vb);
+      c->dirty_bg = false;
+   }
+
+   gen_rect_verts(num_rects++, src_rect, src_inv_size, dst_rect, &c->fb_inv_size, vb);
+
+   for (i = 0; c->dirty_layers > 0; i++)
+   {
+      assert(i < VL_COMPOSITOR_MAX_LAYERS);
+
+      if (c->dirty_layers & (1 << i)) {
+         struct vertex2f layer_inv_size = {1.0f / c->layers[i]->width[0], 1.0f / c->layers[i]->height[0]};
+         gen_rect_verts(num_rects++, &c->layer_src_rects[i], &layer_inv_size,
+                        &c->layer_dst_rects[i], &c->fb_inv_size, vb);
+         c->dirty_layers &= ~(1 << i);
+      }
+   }
+
+   pipe_buffer_unmap(c->pipe->screen, c->vertex_buf.buffer);
+
+   return num_rects;
+}
+
 void vl_compositor_render(struct vl_compositor          *compositor,
-                          /*struct pipe_texture         *backround,
-                          struct pipe_video_rect        *backround_area,*/
                           struct pipe_texture           *src_surface,
                           enum pipe_mpeg12_picture_type picture_type,
                           /*unsigned                    num_past_surfaces,
@@ -363,13 +435,9 @@ void vl_compositor_render(struct vl_compositor          *compositor,
                           struct pipe_video_rect        *src_area,
                           struct pipe_texture           *dst_surface,
                           struct pipe_video_rect        *dst_area,
-                          /*unsigned                      num_layers,
-                          struct pipe_texture           *layers,
-                          struct pipe_video_rect        *layer_src_areas,
-                          struct pipe_video_rect        *layer_dst_areas*/
                           struct pipe_fence_handle      **fence)
 {
-   struct vertex_shader_consts *vs_consts;
+   unsigned num_rects;
 
    assert(compositor);
    assert(src_surface);
@@ -378,8 +446,15 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    assert(dst_area);
    assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME);
 
-   compositor->fb_state.width = dst_surface->width[0];
-   compositor->fb_state.height = dst_surface->height[0];
+   if (compositor->fb_state.width != dst_surface->width[0]) {
+      compositor->fb_inv_size.x = 1.0f / dst_surface->width[0];
+      compositor->fb_state.width = dst_surface->width[0];
+   }
+   if (compositor->fb_state.height != dst_surface->height[0]) {
+      compositor->fb_inv_size.y = 1.0f / dst_surface->height[0];
+      compositor->fb_state.height = dst_surface->height[0];
+   }
+
    compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface
    (
       compositor->pipe->screen,
@@ -402,39 +477,19 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    compositor->pipe->set_sampler_textures(compositor->pipe, 1, &src_surface);
    compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
    compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
-   compositor->pipe->set_vertex_buffers(compositor->pipe, 2, compositor->vertex_bufs);
+   compositor->pipe->set_vertex_buffers(compositor->pipe, 1, &compositor->vertex_buf);
    compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems);
-   compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_VERTEX, 0, &compositor->vs_const_buf);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, &compositor->fs_const_buf);
 
-   vs_consts = pipe_buffer_map
-   (
-      compositor->pipe->screen,
-      compositor->vs_const_buf.buffer,
-      PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
-   );
+   {
+      struct vertex2f src_inv_size = {1.0f / src_surface->width[0], 1.0f / src_surface->height[0]};
+      num_rects = gen_verts(compositor, src_area, &src_inv_size, dst_area);
+   }
+
+   assert(!compositor->dirty_bg && !compositor->dirty_layers);
+   assert(num_rects > 0);
 
-   vs_consts->dst_scale.x = dst_area->w / (float)compositor->fb_state.cbufs[0]->width;
-   vs_consts->dst_scale.y = dst_area->h / (float)compositor->fb_state.cbufs[0]->height;
-   vs_consts->dst_scale.z = 1;
-   vs_consts->dst_scale.w = 1;
-   vs_consts->dst_trans.x = dst_area->x / (float)compositor->fb_state.cbufs[0]->width;
-   vs_consts->dst_trans.y = dst_area->y / (float)compositor->fb_state.cbufs[0]->height;
-   vs_consts->dst_trans.z = 0;
-   vs_consts->dst_trans.w = 0;
-
-   vs_consts->src_scale.x = src_area->w / (float)src_surface->width[0];
-   vs_consts->src_scale.y = src_area->h / (float)src_surface->height[0];
-   vs_consts->src_scale.z = 1;
-   vs_consts->src_scale.w = 1;
-   vs_consts->src_trans.x = src_area->x / (float)src_surface->width[0];
-   vs_consts->src_trans.y = src_area->y / (float)src_surface->height[0];
-   vs_consts->src_trans.z = 0;
-   vs_consts->src_trans.w = 0;
-
-   pipe_buffer_unmap(compositor->pipe->screen, compositor->vs_const_buf.buffer);
-
-   compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4);
+   compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLES, 0, num_rects * 6);
    compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence);
 
    pipe_surface_reference(&compositor->fb_state.cbufs[0], NULL);
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 17e2afd3539..86f8343659e 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #ifndef vl_compositor_h
@@ -31,31 +31,50 @@
 #include <pipe/p_compiler.h>
 #include <pipe/p_state.h>
 #include <pipe/p_video_state.h>
+#include "vl_types.h"
 
 struct pipe_context;
 struct pipe_texture;
 
+#define VL_COMPOSITOR_MAX_LAYERS 16
+
 struct vl_compositor
 {
    struct pipe_context *pipe;
 
    struct pipe_framebuffer_state fb_state;
+   struct vertex2f fb_inv_size;
    void *sampler;
    void *vertex_shader;
    void *fragment_shader;
    struct pipe_viewport_state viewport;
-   struct pipe_vertex_buffer vertex_bufs[2];
+   struct pipe_vertex_buffer vertex_buf;
    struct pipe_vertex_element vertex_elems[2];
-   struct pipe_constant_buffer vs_const_buf, fs_const_buf;
+   struct pipe_constant_buffer fs_const_buf;
+
+   struct pipe_texture *bg;
+   struct pipe_video_rect bg_src_rect;
+   bool dirty_bg;
+   struct pipe_texture *layers[VL_COMPOSITOR_MAX_LAYERS];
+   struct pipe_video_rect layer_src_rects[VL_COMPOSITOR_MAX_LAYERS];
+   struct pipe_video_rect layer_dst_rects[VL_COMPOSITOR_MAX_LAYERS];
+   unsigned dirty_layers;
 };
 
 bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe);
 
 void vl_compositor_cleanup(struct vl_compositor *compositor);
 
+void vl_compositor_set_background(struct vl_compositor *compositor,
+                                  struct pipe_texture *bg, struct pipe_video_rect *bg_src_rect);
+
+void vl_compositor_set_layers(struct vl_compositor *compositor,
+                              struct pipe_texture *layers[],
+                              struct pipe_video_rect *src_rects[],
+                              struct pipe_video_rect *dst_rects[],
+                              unsigned num_layers);
+
 void vl_compositor_render(struct vl_compositor          *compositor,
-                          /*struct pipe_texture         *backround,
-                          struct pipe_video_rect        *backround_area,*/
                           struct pipe_texture           *src_surface,
                           enum pipe_mpeg12_picture_type picture_type,
                           /*unsigned                    num_past_surfaces,
@@ -65,10 +84,6 @@ void vl_compositor_render(struct vl_compositor          *compositor,
                           struct pipe_video_rect        *src_area,
                           struct pipe_texture           *dst_surface,
                           struct pipe_video_rect        *dst_area,
-                          /*unsigned                      num_layers,
-                          struct pipe_texture           *layers,
-                          struct pipe_video_rect        *layer_src_areas,
-                          struct pipe_video_rect        *layer_dst_areas,*/
                           struct pipe_fence_handle      **fence);
 
 void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float *mat);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index cedda8b1d4a..fd924aa23ff 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -42,11 +42,6 @@
 #define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
 
-struct vertex4f
-{
-   float x, y, z, w;
-};
-
 struct vertex_shader_consts
 {
    struct vertex4f denorm;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index a222af1c214..609c83b63fe 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -31,6 +31,7 @@
 #include <pipe/p_compiler.h>
 #include <pipe/p_state.h>
 #include <pipe/p_video_state.h>
+#include "vl_types.h"
 
 struct pipe_context;
 struct pipe_video_surface;
@@ -50,11 +51,6 @@ enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK
    VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE /* Needs conditional texel fetch! */
 };
 
-struct vertex2f
-{
-   float x, y;
-};
-
 struct vl_mpeg12_mc_renderer
 {
    struct pipe_context *pipe;
diff --git a/src/gallium/auxiliary/vl/vl_types.h b/src/gallium/auxiliary/vl/vl_types.h
new file mode 100644
index 00000000000..ce175546894
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_types.h
@@ -0,0 +1,41 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_types_h
+#define vl_types_h
+
+struct vertex2f
+{
+   float x, y;
+};
+
+struct vertex4f
+{
+   float x, y, z, w;
+};
+
+#endif /* vl_types_h */
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 69898d5fcd3..0e299466a5c 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,7 +10,7 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
@@ -22,28 +22,78 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #include <assert.h>
 #include <X11/Xlibint.h>
 #include <X11/extensions/XvMClib.h>
+#include <vl_winsys.h>
+#include <pipe/p_screen.h>
+#include <pipe/p_video_context.h>
+#include <pipe/p_state.h>
+#include <util/u_memory.h>
+#include <util/u_math.h>
+#include "xvmc_private.h"
+
+#define FOURCC_RGB 0x0000003
 
 Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *subpicture,
                             unsigned short width, unsigned short height, int xvimage_id)
 {
+   XvMCContextPrivate *context_priv;
+   XvMCSubPicturePrivate *subpicture_priv;
+   struct pipe_video_context *vpipe;
+   struct pipe_texture template;
+   struct pipe_texture *tex;
+
    assert(dpy);
 
    if (!context)
       return XvMCBadContext;
 
-   assert(subpicture);
+   context_priv = context->privData;
+   vpipe = context_priv->vctx->vpipe;
+
+   if (!subpicture)
+      return XvMCBadSubpicture;
 
-   /*if (width > || height > )
-      return BadValue;*/
+   if (width > 2048 || height > 2048)
+      return BadValue;
+
+   if (xvimage_id != FOURCC_RGB)
+      return BadMatch;
 
-   /*if (xvimage_id != )
-      return BadMatch;*/
+   subpicture_priv = CALLOC(1, sizeof(XvMCSubPicturePrivate));
+   if (!subpicture_priv)
+      return BadAlloc;
+
+   memset(&template, 0, sizeof(struct pipe_texture));
+   template.target = PIPE_TEXTURE_2D;
+   template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
+   template.last_level = 0;
+   if (vpipe->screen->get_param(vpipe->screen, PIPE_CAP_NPOT_TEXTURES)) {
+      template.width[0] = width;
+      template.height[0] = height;
+   }
+   else {
+      template.width[0] = util_next_power_of_two(width);
+      template.height[0] = util_next_power_of_two(height);
+   }
+   template.depth[0] = 1;
+   pf_get_block(template.format, &template.block);
+   template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+   subpicture_priv->context = context;
+   tex = vpipe->screen->texture_create(vpipe->screen, &template);
+   subpicture_priv->sfc = vpipe->screen->get_tex_surface(vpipe->screen, tex, 0, 0, 0,
+                                                         PIPE_BUFFER_USAGE_GPU_READ_WRITE);
+   pipe_texture_reference(&tex, NULL);
+   if (!subpicture_priv->sfc)
+   {
+      FREE(subpicture_priv);
+      return BadAlloc;
+   }
 
    subpicture->subpicture_id = XAllocID(dpy);
    subpicture->context_id = context->context_id;
@@ -56,7 +106,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    subpicture->component_order[1] = 0;
    subpicture->component_order[2] = 0;
    subpicture->component_order[3] = 0;
-   /* TODO: subpicture->privData = ;*/
+   subpicture->privData = subpicture_priv;
 
    SyncHandle();
 
@@ -66,12 +116,19 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
 Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, short y,
                            unsigned short width, unsigned short height, unsigned int color)
 {
+   XvMCSubPicturePrivate *subpicture_priv;
+   XvMCContextPrivate *context_priv;
    assert(dpy);
 
    if (!subpicture)
       return XvMCBadSubpicture;
 
+   subpicture_priv = subpicture->privData;
+   context_priv = subpicture_priv->context->privData;
    /* TODO: Assert clear rect is within bounds? Or clip? */
+   context_priv->vctx->vpipe->surface_fill(context_priv->vctx->vpipe,
+                                           subpicture_priv->sfc, x, y,
+                                           width, height, color);
 
    return Success;
 }
@@ -97,12 +154,18 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
 
 Status XvMCDestroySubpicture(Display *dpy, XvMCSubpicture *subpicture)
 {
+   XvMCSubPicturePrivate *subpicture_priv;
+
    assert(dpy);
 
    if (!subpicture)
       return XvMCBadSubpicture;
 
-   return BadImplementation;
+   subpicture_priv = subpicture->privData;
+   pipe_surface_reference(&subpicture_priv->sfc, NULL);
+   FREE(subpicture_priv);
+
+   return Success;
 }
 
 Status XvMCSetSubpicturePalette(Display *dpy, XvMCSubpicture *subpicture, unsigned char *palette)
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/.gitignore b/src/gallium/state_trackers/xorg/xvmc/tests/.gitignore
index e1d2f9023df..9a8e05d9472 100644
--- a/src/gallium/state_trackers/xorg/xvmc/tests/.gitignore
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/.gitignore
@@ -1,5 +1,6 @@
 test_context
 test_surface
+test_subpicture
 test_blocks
 test_rendering
 xvmc_bench
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/Makefile b/src/gallium/state_trackers/xorg/xvmc/tests/Makefile
index c875dd76058..88b03763563 100644
--- a/src/gallium/state_trackers/xorg/xvmc/tests/Makefile
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/Makefile
@@ -7,7 +7,7 @@ LIBS = -lXvMCW -lXvMC -lXv -lX11
 
 .PHONY: default clean
 
-default: test_context test_surface test_blocks test_rendering xvmc_bench
+default: test_context test_surface test_subpicture test_blocks test_rendering xvmc_bench
 
 test_context: test_context.o testlib.o
 	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
@@ -15,6 +15,9 @@ test_context: test_context.o testlib.o
 test_surface: test_surface.o testlib.o
 	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
 
+test_subpicture: test_subpicture.o testlib.o
+	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
+
 test_blocks: test_blocks.o testlib.o
 	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
 
@@ -25,4 +28,4 @@ xvmc_bench: xvmc_bench.o testlib.o
 	$(CC) $(LDFLAGS) -o $@ $^ $(LIBS)
 
 clean:
-	$(RM) -rf *.o test_context test_surface test_blocks test_rendering xvmc_bench
+	$(RM) -rf *.o test_context test_surface test_subpicture test_blocks test_rendering xvmc_bench
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_subpicture.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_subpicture.c
new file mode 100644
index 00000000000..20d0907a07f
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_subpicture.c
@@ -0,0 +1,182 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+#include <error.h>
+#include <stdio.h>
+#include "testlib.h"
+
+static void PrintGUID(const char *guid)
+{
+	int i;
+	printf("\tguid: ");
+	for (i = 0; i < 4; ++i)
+		printf("%C,", guid[i] == 0 ? '0' : guid[i]);
+	for (; i < 15; ++i)
+		printf("%x,", (unsigned char)guid[i]);
+	printf("%x\n", (unsigned int)guid[15]);
+}
+
+static void PrintComponentOrder(const char *co)
+{
+	int i;
+	printf("\tcomponent_order:\n\t   ");
+	for (i = 0; i < 4; ++i)
+		printf("%C,", co[i] == 0 ? '0' : co[i]);
+	for (; i < 31; ++i)
+		printf("%x,", (unsigned int)co[i]);
+	printf("%x\n", (unsigned int)co[31]);
+}
+
+int main(int argc, char **argv)
+{
+	const unsigned int	width = 16, height = 16;
+	const unsigned int	mc_types[2] = {XVMC_MOCOMP | XVMC_MPEG_2, XVMC_IDCT | XVMC_MPEG_2};
+	const unsigned int	subpic_width = 16, subpic_height = 16;
+
+	Display			*display;
+	XvPortID		port_num;
+	int			surface_type_id;
+	unsigned int		is_overlay, intra_unsigned;
+	int			colorkey;
+	XvMCContext		context;
+	XvImageFormatValues	*subpics;
+	int			num_subpics;
+	XvMCSubpicture		subpicture = {0};
+	int			i;
+
+	display = XOpenDisplay(NULL);
+
+	if (!GetPort
+	(
+		display,
+		width,
+		height,
+		XVMC_CHROMA_FORMAT_420,
+    		mc_types,
+    		2,
+    		&port_num,
+    		&surface_type_id,
+    		&is_overlay,
+    		&intra_unsigned
+	))
+	{
+		XCloseDisplay(display);
+		error(1, 0, "Error, unable to find a good port.\n");
+	}
+
+	if (is_overlay)
+	{
+		Atom xv_colorkey = XInternAtom(display, "XV_COLORKEY", 0);
+		XvGetPortAttribute(display, port_num, xv_colorkey, &colorkey);
+	}
+
+	assert(XvMCCreateContext(display, port_num, surface_type_id, width, height, XVMC_DIRECT, &context) == Success);
+	
+	subpics = XvMCListSubpictureTypes(display, port_num, surface_type_id, &num_subpics);
+	assert((subpics && num_subpics) > 0 || (!subpics && num_subpics == 0));
+	
+	for (i = 0; i < num_subpics; ++i)
+	{
+		printf("Subpicture %d:\n", i);
+		printf("\tid: 0x%08x\n", subpics[i].id);
+		printf("\ttype: %s\n", subpics[i].type == XvRGB ? "XvRGB" : (subpics[i].type == XvYUV ? "XvYUV" : "Unknown"));
+		printf("\tbyte_order: %s\n", subpics[i].byte_order == LSBFirst ? "LSB First" : (subpics[i].byte_order == MSBFirst ? "MSB First" : "Unknown"));
+		PrintGUID(subpics[i].guid);
+		printf("\tbpp: %u\n", subpics[i].bits_per_pixel);
+		printf("\tformat: %s\n", subpics[i].format == XvPacked ? "XvPacked" : (subpics[i].format == XvPlanar ? "XvPlanar" : "Unknown"));
+		printf("\tnum_planes: %u\n", subpics[i].num_planes);
+		
+		if (subpics[i].type == XvRGB)
+		{
+			printf("\tdepth: %u\n", subpics[i].depth);
+			printf("\tred_mask: 0x%08x\n", subpics[i].red_mask);
+			printf("\tgreen_mask: 0x%08x\n", subpics[i].green_mask);
+			printf("\tblue_mask: 0x%08x\n", subpics[i].blue_mask);
+		}
+		else if (subpics[i].type == XvYUV)
+		{
+			printf("\ty_sample_bits: %u\n", subpics[i].y_sample_bits);
+			printf("\tu_sample_bits: %u\n", subpics[i].u_sample_bits);
+			printf("\tv_sample_bits: %u\n", subpics[i].v_sample_bits);
+			printf("\thorz_y_period: %u\n", subpics[i].horz_y_period);
+			printf("\thorz_u_period: %u\n", subpics[i].horz_u_period);
+			printf("\thorz_v_period: %u\n", subpics[i].horz_v_period);
+			printf("\tvert_y_period: %u\n", subpics[i].vert_y_period);
+			printf("\tvert_u_period: %u\n", subpics[i].vert_u_period);
+			printf("\tvert_v_period: %u\n", subpics[i].vert_v_period);
+		}
+		PrintComponentOrder(subpics[i].component_order);
+		printf("\tscanline_order: %s\n", subpics[i].scanline_order == XvTopToBottom ? "XvTopToBottom" : (subpics[i].scanline_order == XvBottomToTop ? "XvBottomToTop" : "Unknown"));
+	}
+	
+	if (num_subpics == 0)
+	{
+		printf("Subpictures not supported, nothing to test.\n");
+		return 0;
+	}
+
+	/* Test NULL context */
+	assert(XvMCCreateSubpicture(display, NULL, &subpicture, subpic_width, subpic_height, subpics[0].id) == XvMCBadContext);
+	/* Test NULL subpicture */
+	assert(XvMCCreateSubpicture(display, &context, NULL, subpic_width, subpic_height, subpics[0].id) == XvMCBadSubpicture);
+	/* Test invalid subpicture */
+	assert(XvMCCreateSubpicture(display, &context, &subpicture, subpic_width, subpic_height, -1) == BadMatch);
+	/* Test huge width */
+	assert(XvMCCreateSubpicture(display, &context, &subpicture, 16384, subpic_height, subpics[0].id) == BadValue);
+	/* Test huge height */
+	assert(XvMCCreateSubpicture(display, &context, &subpicture, subpic_width, 16384, subpics[0].id) == BadValue);
+	/* Test huge width & height */
+	assert(XvMCCreateSubpicture(display, &context, &subpicture, 16384, 16384, subpics[0].id) == BadValue);
+	for (i = 0; i < num_subpics; ++i)
+	{
+		/* Test valid params */
+		assert(XvMCCreateSubpicture(display, &context, &subpicture, subpic_width, subpic_height, subpics[i].id) == Success);
+		/* Test subpicture id assigned */
+		assert(subpicture.subpicture_id != 0);
+		/* Test context id assigned and correct */
+		assert(subpicture.context_id == context.context_id);
+		/* Test subpicture type id assigned and correct */
+		assert(subpicture.xvimage_id == subpics[i].id);
+		/* Test width & height assigned and correct */
+		assert(subpicture.width == width && subpicture.height == height);
+		/* Test no palette support */
+		assert(subpicture.num_palette_entries == 0 && subpicture.entry_bytes == 0);
+		/* Test valid params */
+		assert(XvMCDestroySubpicture(display, &subpicture) == Success);
+	}
+	/* Test NULL surface */
+	assert(XvMCDestroySubpicture(display, NULL) == XvMCBadSubpicture);
+
+	assert(XvMCDestroyContext(display, &context) == Success);
+
+	XFree(subpics);
+	XvUngrabPort(display, port_num, CurrentTime);
+	XCloseDisplay(display);
+
+	return 0;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 5fb994db740..6e1b86304ba 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -55,4 +55,12 @@ typedef struct
 	XvMCContext *context;
 } XvMCSurfacePrivate;
 
+typedef struct
+{
+	struct pipe_surface *sfc;
+	/* Some XvMC functions take a subpicture but not a context,
+	   so we keep track of which context each subpicture belongs to. */
+	XvMCContext *context;
+} XvMCSubPicturePrivate;
+
 #endif /* xvmc_private_h */
-- 
cgit v1.2.3


From 9e8ab2e7c1b8ff5279d4247b8690c9bfc57f7e02 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 5 Dec 2009 12:53:42 -0500
Subject: g3dvl: It's ok to not have cliprects (minimized windows, etc).

---
 src/gallium/winsys/g3dvl/dri/dri_winsys.c | 34 ++++++++++++++++---------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
index 257aa0a1201..da54c0976f6 100644
--- a/src/gallium/winsys/g3dvl/dri/dri_winsys.c
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -238,27 +238,29 @@ vl_dri_flush_frontbuffer(struct pipe_screen *screen,
 
    vl_dri_update_drawables_locked(vl_dri_ctx);
 
-   src_bbox.x1 = 0;
-   src_bbox.x2 = vl_dri_ctx->drawable->w;
-   src_bbox.y1 = 0;
-   src_bbox.y2 = vl_dri_ctx->drawable->h;
+   if (vl_dri_ctx->drawable->cliprects) {
+      src_bbox.x1 = 0;
+      src_bbox.x2 = vl_dri_ctx->drawable->w;
+      src_bbox.y1 = 0;
+      src_bbox.y2 = vl_dri_ctx->drawable->h;
 
 #if 0
-   if (vl_dri_scrn->_api_hooks->present_locked)
-      vl_dri_scrn->api_hooks->present_locked(pipe, surf,
-                                             vl_dri_ctx->drawable->cliprects,
-                                             vl_dri_ctx->drawable->num_cliprects,
-                                             vl_dri_ctx->drawable->x, vl_dri_drawable->y,
-                                             &bbox, NULL /*fence*/);
-   else
+      if (vl_dri_scrn->_api_hooks->present_locked)
+         vl_dri_scrn->api_hooks->present_locked(pipe, surf,
+                                                vl_dri_ctx->drawable->cliprects,
+                                                vl_dri_ctx->drawable->num_cliprects,
+                                                vl_dri_ctx->drawable->x, vl_dri_drawable->y,
+                                                &bbox, NULL /*fence*/);
+      else
 #endif
-   if (vl_dri_scrn->api_hooks->front_srf_locked) {
-      struct pipe_surface *front = vl_dri_scrn->api_hooks->front_srf_locked(screen);
+      if (vl_dri_scrn->api_hooks->front_srf_locked) {
+         struct pipe_surface *front = vl_dri_scrn->api_hooks->front_srf_locked(screen);
 
-      if (front)
-         vl_clip_copy(vl_dri_ctx, front, surf, &src_bbox);
+         if (front)
+            vl_clip_copy(vl_dri_ctx, front, surf, &src_bbox);
 
-      //st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, fence);
+         //st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, fence);
+      }
    }
 
    vl_dri_ctx->lost_lock = save_lost_lock;
-- 
cgit v1.2.3


From 6783672a974d73b9900f596fc71fb9c17d2c2ecc Mon Sep 17 00:00:00 2001
From: Cooper Yuan <cooperyuan@gmail.com>
Date: Fri, 29 Jan 2010 21:42:09 +0800
Subject: r300g/g3dvl: port xvmc video stuff to mesa/pipe-video branch

---
 src/gallium/drivers/r300/r300_video_context.c   | 329 ++++++++++++++++++++++++
 src/gallium/drivers/r300/r300_video_context.h   |  40 +++
 src/gallium/winsys/drm/radeon/core/radeon_drm.c |  19 ++
 src/gallium/winsys/g3dvl/drm/radeon/Makefile    |  19 ++
 4 files changed, 407 insertions(+)
 create mode 100644 src/gallium/drivers/r300/r300_video_context.c
 create mode 100644 src/gallium/drivers/r300/r300_video_context.h
 create mode 100644 src/gallium/winsys/g3dvl/drm/radeon/Makefile

diff --git a/src/gallium/drivers/r300/r300_video_context.c b/src/gallium/drivers/r300/r300_video_context.c
new file mode 100644
index 00000000000..622f1b8820b
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_video_context.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright (C) 2009-2010  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+#include <pipe/p_defines.h>
+#include <pipe/p_context.h>
+#include <pipe/p_screen.h>
+#include <pipe/p_inlines.h>
+#include <util/u_memory.h>
+#include <X11/Xlib.h>
+
+#include <fcntl.h>
+
+#include "radeon_buffer.h"
+#include "radeon_r300.h"
+#include "r300_screen.h"
+#include "r300_texture.h"
+#include "p_video_context.h"
+#include "radeon_vl.h"
+#include "softpipe/sp_winsys.h"
+#include "softpipe/sp_texture.h"
+
+#include "r300_video_context.h"
+#include <softpipe/sp_video_context.h>
+
+static void r300_mpeg12_destroy(struct pipe_video_context *vpipe)
+{
+    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
+
+    assert(vpipe);
+
+    ctx->pipe->bind_vs_state(ctx->pipe, NULL);
+    ctx->pipe->bind_fs_state(ctx->pipe, NULL);
+
+    ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend);
+    ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast);
+    ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
+
+    pipe_video_surface_reference(&ctx->decode_target, NULL);
+    vl_compositor_cleanup(&ctx->compositor);
+    vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+    ctx->pipe->destroy(ctx->pipe);
+
+    FREE(ctx);
+}
+
+static void
+r300_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
+                               struct pipe_video_surface *past,
+                               struct pipe_video_surface *future,
+                               unsigned num_macroblocks,
+                               struct pipe_macroblock *macroblocks,
+                               struct pipe_fence_handle **fence)
+{
+    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
+    struct pipe_mpeg12_macroblock *mpeg12_macroblocks =
+                         (struct pipe_mpeg12_macroblock*)macroblocks;
+
+    assert(vpipe);
+    assert(num_macroblocks);
+    assert(macroblocks);
+    assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
+    assert(ctx->decode_target);
+
+    vl_mpeg12_mc_renderer_render_macroblocks(
+                            &ctx->mc_renderer,
+                            r300_video_surface(ctx->decode_target)->tex,
+                            past ? r300_video_surface(past)->tex : NULL,
+                            future ? r300_video_surface(future)->tex : NULL,
+                            num_macroblocks, mpeg12_macroblocks, fence);
+}
+
+static void r300_mpeg12_clear_surface(struct pipe_video_context *vpipe,
+                                      unsigned x, unsigned y,
+                                      unsigned width, unsigned height,
+                                      unsigned value,
+                                      struct pipe_surface *surface)
+{
+    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
+
+    assert(vpipe);
+    assert(surface);
+
+    if (ctx->pipe->surface_fill)
+        ctx->pipe->surface_fill(ctx->pipe, surface, x, y, width, height, value);
+    else
+        util_surface_fill(ctx->pipe, surface, x, y, width, height, value);
+}
+
+static void
+r300_mpeg12_render_picture(struct pipe_video_context     *vpipe,
+                           struct pipe_video_surface     *src_surface,
+                           enum pipe_mpeg12_picture_type picture_type,
+                           struct pipe_video_rect        *src_area,
+                           struct pipe_surface           *dst_surface,
+                           struct pipe_video_rect        *dst_area,
+                           struct pipe_fence_handle      **fence)
+{
+    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
+
+    assert(vpipe);
+    assert(src_surface);
+    assert(src_area);
+    assert(dst_surface);
+    assert(dst_area);
+
+    vl_compositor_render(&ctx->compositor,
+                         r300_video_surface(src_surface)->tex,
+                         picture_type, src_area, dst_surface->texture,
+                         dst_area, fence);
+}
+
+static void r300_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
+                                          struct pipe_video_surface *dt)
+{
+    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
+
+    assert(vpipe);
+    assert(dt);
+
+    pipe_video_surface_reference(&ctx->decode_target, dt);
+}
+
+static void r300_mpeg12_set_csc_matrix(struct pipe_video_context *vpipe,
+                                       const float *mat)
+{
+    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
+
+    assert(vpipe);
+
+    vl_compositor_set_csc_matrix(&ctx->compositor, mat);
+}
+
+static bool r300_mpeg12_init_pipe_state(struct radeon_mpeg12_context *ctx)
+{
+    struct pipe_rasterizer_state rast;
+    struct pipe_blend_state blend;
+    struct pipe_depth_stencil_alpha_state dsa;
+    unsigned i;
+
+    assert(ctx);
+
+    rast.flatshade = 1;
+    rast.flatshade_first = 0;
+    rast.light_twoside = 0;
+    rast.front_winding = PIPE_WINDING_CCW;
+    rast.cull_mode = PIPE_WINDING_CW;
+    rast.fill_cw = PIPE_POLYGON_MODE_FILL;
+    rast.fill_ccw = PIPE_POLYGON_MODE_FILL;
+    rast.offset_cw = 0;
+    rast.offset_ccw = 0;
+    rast.scissor = 0;
+    rast.poly_smooth = 0;
+    rast.poly_stipple_enable = 0;
+    rast.point_sprite = 0;
+    rast.point_size_per_vertex = 0;
+    rast.multisample = 0;
+    rast.line_smooth = 0;
+    rast.line_stipple_enable = 0;
+    rast.line_stipple_factor = 0;
+    rast.line_stipple_pattern = 0;
+    rast.line_last_pixel = 0;
+    rast.bypass_vs_clip_and_viewport = 0;
+    rast.line_width = 1;
+    rast.point_smooth = 0;
+    rast.point_size = 1;
+    rast.offset_units = 1;
+    rast.offset_scale = 1;
+    /*rast.sprite_coord_mode[i] = ;*/
+    ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
+    ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
+
+    blend.blend_enable = 0;
+    blend.rgb_func = PIPE_BLEND_ADD;
+    blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+    blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+    blend.alpha_func = PIPE_BLEND_ADD;
+    blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+    blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+    blend.logicop_enable = 0;
+    blend.logicop_func = PIPE_LOGICOP_CLEAR;
+    /* Needed to allow color writes to FB, even if blending disabled */
+    blend.colormask = PIPE_MASK_RGBA;
+    blend.dither = 0;
+    ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend);
+    ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
+
+    dsa.depth.enabled = 0;
+    dsa.depth.writemask = 0;
+    dsa.depth.func = PIPE_FUNC_ALWAYS;
+    for (i = 0; i < 2; ++i)
+    {
+        dsa.stencil[i].enabled = 0;
+        dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
+        dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
+        dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
+        dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
+        dsa.stencil[i].ref_value = 0;
+        dsa.stencil[i].valuemask = 0;
+        dsa.stencil[i].writemask = 0;
+    }
+    dsa.alpha.enabled = 0;
+    dsa.alpha.func = PIPE_FUNC_ALWAYS;
+    dsa.alpha.ref_value = 0;
+    ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa);
+    ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
+
+    return true;
+}
+
+static struct pipe_video_context *
+r300_mpeg12_context_create(struct pipe_screen *screen,
+                           enum pipe_video_profile profile,
+                           enum pipe_video_chroma_format chroma_format,
+                           unsigned int width,
+                           unsigned int height)
+{
+    struct radeon_mpeg12_context *ctx;
+    ctx = CALLOC_STRUCT(radeon_mpeg12_context);
+    if (!ctx)
+        return NULL;
+
+    ctx->base.profile       = profile;
+    ctx->base.chroma_format = chroma_format;
+    ctx->base.width         = width;
+    ctx->base.height        = height;
+    ctx->base.screen        = screen;
+
+    ctx->base.destroy               = radeon_mpeg12_destroy;
+    ctx->base.decode_macroblocks    = radeon_mpeg12_decode_macroblocks;
+    ctx->base.clear_surface         = radeon_mpeg12_clear_surface;
+    ctx->base.render_picture        = radeon_mpeg12_render_picture;
+    ctx->base.set_decode_target     = radeon_mpeg12_set_decode_target;
+    ctx->base.set_csc_matrix        = radeon_mpeg12_set_csc_matrix;
+
+    ctx->pipe = r300_create_context(screen,(struct r300_winsys*)screen->winsys);
+    if (!ctx->pipe)
+    {
+        FREE(ctx);
+        return NULL;
+    }
+
+    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
+                                   width, height, chroma_format,
+                                   VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
+                                   VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
+                                   true))
+    {
+        ctx->pipe->destroy(ctx->pipe);
+        FREE(ctx);
+        return NULL;
+    }
+
+    if (!vl_compositor_init(&ctx->compositor, ctx->pipe))
+    {
+        vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+        ctx->pipe->destroy(ctx->pipe);
+        FREE(ctx);
+        return NULL;
+    }
+
+    if (!radeon_mpeg12_init_pipe_state(ctx))
+    {
+        vl_compositor_cleanup(&ctx->compositor);
+        vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+        ctx->pipe->destroy(ctx->pipe);
+        FREE(ctx);
+        return NULL;
+    }
+
+    return &ctx->base;
+}
+
+struct pipe_video_context *
+r300_video_create(struct pipe_context *pipe, enum pipe_video_profile profile,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height,
+                  unsigned pvctx_id)
+{
+    struct pipe_video_context *vpipe;
+    struct radeon_vl_context *rvl_ctx;
+
+    assert(p_screen);
+    assert(width && height);
+
+    /* create radeon pipe_context */
+    switch(u_reduce_video_profile(profile))
+    {
+        case PIPE_VIDEO_CODEC_MPEG12:
+            vpipe = radeon_mpeg12_context_create(p_screen, profile, chr_f,
+                                                 width, height);
+            break;
+        default:
+            return NULL;
+    }
+
+    /* create radeon_vl_context */
+    rvl_ctx = calloc(1, sizeof(struct radeon_vl_context));
+    rvl_ctx->display = display;
+    rvl_ctx->screen = screen;
+
+    vpipe->priv = rvl_ctx;
+
+    return vpipe;
+}
diff --git a/src/gallium/drivers/r300/r300_video_context.h b/src/gallium/drivers/r300/r300_video_context.h
new file mode 100644
index 00000000000..a8210ba7b71
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_video_context.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2009-2010  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#ifndef __R300_VIDEO_CONTEXT_H__
+#define __R300_VIDEO_CONTEXT_H__
+
+#include <pipe/p_video_context.h>
+
+struct pipe_context;
+
+struct pipe_video_context*
+r300_video_create(struct pipe_context *pipe, enum pipe_video_profile profile,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height,
+                  unsigned pvctx_id);
+
+#endif
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
index 52419725337..cbbdcf2651d 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
@@ -130,6 +130,24 @@ struct pipe_context* radeon_create_context(struct drm_api* api,
     }
 }
 
+
+static struct pipe_video_context *
+radeon_create_video_context(struct drm_api *api, struct pipe_screen *pscreen,
+                            enum pipe_video_profile profile,
+                            enum pipe_video_chroma_format chroma_format,
+                            unsigned width, unsigned height)
+{
+    struct radeon_winsys *winsys = (struct radeon_winsys*)pscreen->winsys;
+    struct pipe_context *pipe;
+    struct pipe_video_context pvctx;
+
+    pipe = radeon_create_context(api, pscreen);
+    if (!pipe)
+        return NULL;
+
+    pvctx = r300_video_create(pipe, profile, chroma_format, width, height, i);
+}
+
 boolean radeon_buffer_from_texture(struct drm_api* api,
                                    struct pipe_texture* texture,
                                    struct pipe_buffer** buffer,
@@ -249,6 +267,7 @@ static boolean radeon_local_handle_from_texture(struct drm_api *api,
 struct drm_api drm_api_hooks = {
     .create_screen = radeon_create_screen,
     .create_context = radeon_create_context,
+    .create_video_context = radeon_create_video_context,
     .texture_from_shared_handle = radeon_texture_from_shared_handle,
     .shared_handle_from_texture = radeon_shared_handle_from_texture,
     .local_handle_from_texture = radeon_local_handle_from_texture,
diff --git a/src/gallium/winsys/g3dvl/drm/radeon/Makefile b/src/gallium/winsys/g3dvl/drm/radeon/Makefile
new file mode 100644
index 00000000000..6768119c2f4
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/drm/radeon/Makefile
@@ -0,0 +1,19 @@
+# This makefile produces a libXvMCg3dvl.so which is
+# based on DRM/DRI
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+C_SOURCES =
+
+DRIVER_INCLUDES = $(shell pkg-config libdrm libdrm_radeon --cflags-only-I) \
+                   -I$(TOP)/src/gallium/winsys/drm/nouveau \
+DRIVER_DEFINES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-other)
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/winsys/drm/radeon/drm/libradeondrm.a \
+	$(TOP)/src/gallium/drivers/radeon/libradeon.a \
+
+DRIVER_LIB_DEPS += $(shell pkg-config libdrm_radeon --libs)
+
+include ../Makefile.template
-- 
cgit v1.2.3


From 4d65133e8691e51a17aa896d5bb40022cfae5a62 Mon Sep 17 00:00:00 2001
From: Cooper Yuan <cooperyuan@gmail.com>
Date: Mon, 1 Feb 2010 17:53:46 +0800
Subject: r300g/g3dvl: Fix build error and correct Makefile for xvmc lib

---
 src/gallium/winsys/drm/radeon/core/radeon_drm.c | 5 +++--
 src/gallium/winsys/g3dvl/drm/radeon/Makefile    | 9 +++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
index cbbdcf2651d..e2b451dc329 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
@@ -30,6 +30,7 @@
  */
 
 #include "radeon_drm.h"
+#include "r300_video_context.h"
 
 /* Helper function to do the ioctls needed for setup and init. */
 static void do_ioctls(int fd, struct radeon_winsys* winsys)
@@ -139,13 +140,13 @@ radeon_create_video_context(struct drm_api *api, struct pipe_screen *pscreen,
 {
     struct radeon_winsys *winsys = (struct radeon_winsys*)pscreen->winsys;
     struct pipe_context *pipe;
-    struct pipe_video_context pvctx;
+    struct pipe_video_context *pvctx;
 
     pipe = radeon_create_context(api, pscreen);
     if (!pipe)
         return NULL;
 
-    pvctx = r300_video_create(pipe, profile, chroma_format, width, height, i);
+    pvctx = r300_video_create(pipe, profile, chroma_format, width, height, 0);
 }
 
 boolean radeon_buffer_from_texture(struct drm_api* api,
diff --git a/src/gallium/winsys/g3dvl/drm/radeon/Makefile b/src/gallium/winsys/g3dvl/drm/radeon/Makefile
index 6768119c2f4..0f7fd1c15ad 100644
--- a/src/gallium/winsys/g3dvl/drm/radeon/Makefile
+++ b/src/gallium/winsys/g3dvl/drm/radeon/Makefile
@@ -7,12 +7,13 @@ include $(TOP)/configs/current
 C_SOURCES =
 
 DRIVER_INCLUDES = $(shell pkg-config libdrm libdrm_radeon --cflags-only-I) \
-                   -I$(TOP)/src/gallium/winsys/drm/nouveau \
-DRIVER_DEFINES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-other)
+                   -I$(TOP)/src/gallium/winsys/drm/radeon \
+DRIVER_DEFINES = $(shell pkg-config libdrm libdrm_radeon --cflags-only-other)
 
 PIPE_DRIVERS = \
-	$(TOP)/src/gallium/winsys/drm/radeon/drm/libradeondrm.a \
-	$(TOP)/src/gallium/drivers/radeon/libradeon.a \
+	$(TOP)/src/gallium/winsys/drm/radeon/core/libradeonwinsys.a \
+	$(TOP)/src/gallium/drivers/r300/libr300.a \
+        $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a
 
 DRIVER_LIB_DEPS += $(shell pkg-config libdrm_radeon --libs)
 
-- 
cgit v1.2.3


From 40cd082afa42c86e320f73389f3d0836587f97d9 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Fri, 5 Mar 2010 23:11:05 -0500
Subject: vl: Add switches to autoconf.

Pass 'xorg/xvmc' to --with-state-trackers to get the XvMC state tracker.
Pass --enable-gallium-g3dvl to enable the winsys.
---
 configure.ac | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/configure.ac b/configure.ac
index f9476a46dda..bc2d5c17012 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1241,6 +1241,22 @@ if test "x$enable_gallium_nouveau" = xyes; then
     GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nv04 nv10 nv20 nv30 nv40 nv50"
 fi
 
+dnl
+dnl Gallium G3DVL configuration
+dnl
+AC_ARG_ENABLE([gallium-g3dvl],
+    [AS_HELP_STRING([--enable-gallium-g3dvl],
+        [build gallium g3dvl @<:@default=disabled@:>@])],
+    [enable_gallium_g3dvl="$enableval"],
+    [enable_gallium_g3dvl=no])
+if test "x$enable_gallium_g3dvl" = xyes; then
+    vl_winsys_dirs=""
+    for dir in $GALLIUM_WINSYS_DIRS; do
+        vl_winsys_dirs="$vl_winsys_dirs g3dvl/$dir"
+    done
+    GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS $vl_winsys_dirs"
+fi
+
 
 dnl Restore LDFLAGS and CPPFLAGS
 LDFLAGS="$_SAVE_LDFLAGS"
-- 
cgit v1.2.3


From 8580b7a0eeed3fc29320b2c0a184084e4267661a Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Fri, 5 Mar 2010 23:14:49 -0500
Subject: vl: Add some basic debug output for XvMC.

Set the XVMC_DEBUG env var to:
	0 for no extra output
	1 for error output
	2 for warning output
	3 for tracing output
---
 src/gallium/state_trackers/xorg/xvmc/context.c     | 47 ++++++++++++++++++----
 src/gallium/state_trackers/xorg/xvmc/subpicture.c  |  9 +++++
 src/gallium/state_trackers/xorg/xvmc/surface.c     | 32 +++++++++++++--
 .../state_trackers/xorg/xvmc/xvmc_private.h        | 23 +++++++++++
 4 files changed, 100 insertions(+), 11 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index 56003618ada..d56d61c79c6 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -34,7 +34,6 @@
 #include <pipe/p_state.h>
 #include <vl_winsys.h>
 #include <util/u_memory.h>
-#include <util/u_debug.h>
 #include <vl/vl_csc.h>
 #include "xvmc_private.h"
 
@@ -90,6 +89,15 @@ static Status Validate(Display *dpy, XvPortID port, int surface_type_id,
                *mc_type = surface_info[l].mc_type;
                *surface_flags = surface_info[l].flags;
                *screen = i;
+
+               XVMC_MSG(XVMC_TRACE, "[XvMC] Found suitable context surface format.\n" \
+                                    "[XvMC]   screen=%u, port=%u\n" \
+                                    "[XvMC]   id: 0x%08X\n" \
+                                    "[XvMC]   max width=%u, max height=%u\n" \
+                                    "[XvMC]   chroma format=0x%08X\n" \
+                                    "[XvMC]   acceleration level=0x%08X\n" \
+                                    "[XvMC]   flags=0x%08X\n",
+                                    i, port, surface_type_id, max_width, max_height, *chroma_format, *mc_type, *surface_flags);
             }
 
             XFree(surface_info);
@@ -99,14 +107,23 @@ static Status Validate(Display *dpy, XvPortID port, int surface_type_id,
       XvFreeAdaptorInfo(adaptor_info);
    }
 
-   if (!*found_port)
+   if (!*found_port) {
+      XVMC_MSG(XVMC_ERR, "[XvMC] Could not find a suitable port.\n");
       return XvBadPort;
-   if (!found_surface)
+   }
+   if (!found_surface) {
+      XVMC_MSG(XVMC_ERR, "[XvMC] Could not find a suitable surface.\n");
       return BadMatch;
-   if (width > max_width || height > max_height)
+   }
+   if (width > max_width || height > max_height) {
+      XVMC_MSG(XVMC_ERR, "[XvMC] Requested context dimensions (w=%u,h=%u) too large (max w=%u,h=%u).\n",
+               width, height, max_width, max_height);
       return BadValue;
-   if (flags != XVMC_DIRECT && flags != 0)
+   }
+   if (flags != XVMC_DIRECT && flags != 0) {
+      XVMC_MSG(XVMC_ERR, "[XvMC] Invalid context flags 0x%08X.\n", flags);
       return BadValue;
+   }
 
    return Success;
 }
@@ -124,6 +141,8 @@ static enum pipe_video_profile ProfileToPipe(int xvmc_profile)
 
    assert(0);
 
+   XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized profile 0x%08X.\n", xvmc_profile);
+
    return -1;
 }
 
@@ -140,6 +159,8 @@ static enum pipe_video_chroma_format FormatToPipe(int xvmc_format)
          assert(0);
    }
 
+   XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized format 0x%08X.\n", xvmc_format);
+
    return -1;
 }
 
@@ -157,6 +178,8 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
    XvMCContextPrivate *context_priv;
    float csc[16];
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Creating context %p.\n", context);
+
    assert(dpy);
 
    if (!context)
@@ -171,15 +194,15 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
 
    /* XXX: Current limits */
    if (chroma_format != XVMC_CHROMA_FORMAT_420) {
-      debug_printf("[XvMCg3dvl] Cannot decode requested surface type. Unsupported chroma format.\n");
+      XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Unsupported chroma format.\n");
       return BadImplementation;
    }
    if (mc_type != (XVMC_MOCOMP | XVMC_MPEG_2)) {
-      debug_printf("[XvMCg3dvl] Cannot decode requested surface type. Non-MPEG2/Mocomp acceleration unsupported.\n");
+      XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Non-MPEG2/Mocomp acceleration unsupported.\n");
       return BadImplementation;
    }
    if (!(surface_flags & XVMC_INTRA_UNSIGNED)) {
-      debug_printf("[XvMCg3dvl] Cannot decode requested surface type. Signed intra unsupported.\n");
+      XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Signed intra unsupported.\n");
       return BadImplementation;
    }
 
@@ -191,6 +214,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
    vscreen = vl_screen_create(dpy, scrn);
 
    if (!vscreen) {
+      XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL screen.\n");
       FREE(context_priv);
       return BadAlloc;
    }
@@ -199,6 +223,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
                           FormatToPipe(chroma_format), width, height);
 
    if (!vctx) {
+      XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL context.\n");
       vl_screen_destroy(vscreen);
       FREE(context_priv);
       return BadAlloc;
@@ -225,6 +250,8 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
 
    SyncHandle();
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Context %p created.\n", context);
+
    return Success;
 }
 
@@ -234,6 +261,8 @@ Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
    struct vl_context *vctx;
    XvMCContextPrivate *context_priv;
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Destroying context %p.\n", context);
+
    assert(dpy);
 
    if (!context || !context->privData)
@@ -248,5 +277,7 @@ Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
    FREE(context_priv);
    context->privData = NULL;
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Context %p destroyed.\n", context);
+
    return Success;
 }
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 7f10f366d21..d64d075f330 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -47,6 +47,8 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    struct pipe_texture template;
    struct pipe_texture *tex;
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Creating subpicture %p.\n", subpicture);
+
    assert(dpy);
 
    if (!context)
@@ -58,6 +60,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    if (!subpicture)
       return XvMCBadSubpicture;
 
+   /* TODO: Check against surface max width, height */
    if (width > 2048 || height > 2048)
       return BadValue;
 
@@ -109,6 +112,8 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
 
    SyncHandle();
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p created.\n", subpicture);
+
    return Success;
 }
 
@@ -155,6 +160,8 @@ Status XvMCDestroySubpicture(Display *dpy, XvMCSubpicture *subpicture)
 {
    XvMCSubPicturePrivate *subpicture_priv;
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Destroying subpicture %p.\n", subpicture);
+
    assert(dpy);
 
    if (!subpicture)
@@ -164,6 +171,8 @@ Status XvMCDestroySubpicture(Display *dpy, XvMCSubpicture *subpicture)
    pipe_surface_reference(&subpicture_priv->sfc, NULL);
    FREE(subpicture_priv);
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p destroyed.\n", subpicture);
+
    return Success;
 }
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index f640b1464f4..79dae3fb8b7 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -48,6 +48,8 @@ static enum pipe_mpeg12_macroblock_type TypeToPipe(int xvmc_mb_type)
 
    assert(0);
 
+   XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized mb type 0x%08X.\n", xvmc_mb_type);
+
    return -1;
 }
 
@@ -64,6 +66,8 @@ static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic)
          assert(0);
    }
 
+   XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized picture type 0x%08X.\n", xvmc_pic);
+
    return -1;
 }
 
@@ -71,8 +75,11 @@ static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, int xvmc_
 {
    switch (xvmc_motion_type) {
       case XVMC_PREDICTION_FRAME:
-         return xvmc_dct_type == XVMC_DCT_TYPE_FIELD ?
-            PIPE_MPEG12_MOTION_TYPE_16x8 : PIPE_MPEG12_MOTION_TYPE_FRAME;
+         if (xvmc_dct_type == XVMC_DCT_TYPE_FIELD)
+            return PIPE_MPEG12_MOTION_TYPE_16x8;
+         else if (xvmc_dct_type == XVMC_DCT_TYPE_FRAME)
+            return PIPE_MPEG12_MOTION_TYPE_FRAME;
+         break;
       case XVMC_PREDICTION_FIELD:
          return PIPE_MPEG12_MOTION_TYPE_FIELD;
       case XVMC_PREDICTION_DUAL_PRIME:
@@ -81,6 +88,8 @@ static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, int xvmc_
          assert(0);
    }
 
+   XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized motion type 0x%08X (with DCT type 0x%08X).\n", xvmc_motion_type, xvmc_dct_type);
+
    return -1;
 }
 
@@ -183,6 +192,8 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
    XvMCSurfacePrivate *surface_priv;
    struct pipe_video_surface *vsfc;
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Creating surface %p.\n", surface);
+
    assert(dpy);
 
    if (!context)
@@ -197,6 +208,7 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
    if (!surface_priv)
       return BadAlloc;
 
+   assert(vpipe->screen->video_surface_create);
    vsfc = vpipe->screen->video_surface_create(vpipe->screen, vpipe->chroma_format,
                                               vpipe->width, vpipe->height);
    if (!vsfc) {
@@ -216,6 +228,8 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
 
    SyncHandle();
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Surface %p created.\n", surface);
+
    return Success;
 }
 
@@ -236,6 +250,8 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    struct pipe_mpeg12_macroblock pipe_macroblocks[num_macroblocks];
    unsigned int i;
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Rendering to surface %p.\n", target_surface);
+
    assert(dpy);
 
    if (!context || !context->privData)
@@ -288,6 +304,8 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    for (i = 0; i < num_macroblocks; ++i)
       vpipe->screen->buffer_destroy(pipe_macroblocks[i].blocks);
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for rendering.\n", target_surface);
+
    return Success;
 }
 
@@ -328,6 +346,8 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    struct pipe_video_rect src_rect = {srcx, srcy, srcw, srch};
    struct pipe_video_rect dst_rect = {destx, desty, destw, desth};
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Displaying surface %p.\n", surface);
+
    assert(dpy);
 
    if (!surface || !surface->privData)
@@ -363,7 +383,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
                          context_priv->backbuffer, &dst_rect, surface_priv->disp_fence);
 
    vl_video_bind_drawable(context_priv->vctx, drawable);
-	
+
    vpipe->screen->flush_frontbuffer
    (
       vpipe->screen,
@@ -371,6 +391,8 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
       vpipe->priv
    );
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display.\n", surface);
+
    return Success;
 }
 
@@ -392,6 +414,8 @@ Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
 {
    XvMCSurfacePrivate *surface_priv;
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Destroying surface %p.\n", surface);
+
    assert(dpy);
 
    if (!surface || !surface->privData)
@@ -402,6 +426,8 @@ Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
    FREE(surface_priv);
    surface->privData = NULL;
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Surface %p destroyed.\n", surface);
+
    return Success;
 }
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 6e1b86304ba..96fe7a9f5e3 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -30,6 +30,7 @@
 
 #include <X11/Xlib.h>
 #include <X11/extensions/XvMClib.h>
+#include <util/u_debug.h>
 
 #define BLOCK_SIZE_SAMPLES 64
 #define BLOCK_SIZE_BYTES (BLOCK_SIZE_SAMPLES * 2)
@@ -63,4 +64,26 @@ typedef struct
 	XvMCContext *context;
 } XvMCSubPicturePrivate;
 
+#define XVMC_OUT   0
+#define XVMC_ERR   1
+#define XVMC_WARN  2
+#define XVMC_TRACE 3
+static INLINE void XVMC_MSG(unsigned int level, const char *fmt, ...)
+{
+   static boolean check_dbg_level = TRUE;
+   static unsigned int debug_level;
+
+   if (check_dbg_level) {
+      debug_level = debug_get_num_option("XVMC_DEBUG", 0);
+      check_dbg_level = FALSE;
+   }
+
+   if (level <= debug_level) {
+      va_list ap;
+      va_start(ap, fmt);
+      _debug_vprintf(fmt, ap);
+      va_end(ap);
+   }
+}
+
 #endif /* xvmc_private_h */
-- 
cgit v1.2.3


From 80468464897682b8e10aeab310f20fdd7ddc6cb4 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Fri, 5 Mar 2010 23:26:03 -0500
Subject: vl: Subpicture/compositing fixes.

---
 src/gallium/auxiliary/vl/vl_compositor.c           | 73 ++++++++++++------
 src/gallium/drivers/softpipe/sp_video_context.c    | 52 +++++++++----
 src/gallium/include/pipe/p_video_context.h         | 10 +++
 src/gallium/state_trackers/xorg/xvmc/subpicture.c  | 88 ++++++++++++++++++++--
 src/gallium/state_trackers/xorg/xvmc/surface.c     | 23 +++++-
 .../state_trackers/xorg/xvmc/xvmc_private.h        | 49 +++++++-----
 6 files changed, 233 insertions(+), 62 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 44b3714dcc3..e6d787b4d74 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -349,7 +349,7 @@ static void gen_rect_verts(unsigned pos,
    assert(pos < VL_COMPOSITOR_MAX_LAYERS + 2);
    assert(src_rect);
    assert(src_inv_size);
-   assert((dst_rect && dst_inv_size) || (!dst_rect && !dst_inv_size));
+   assert((dst_rect && dst_inv_size) /*|| (!dst_rect && !dst_inv_size)*/);
    assert(vb);
 
    vb[pos * 6 + 0].x = dst_rect->x * dst_inv_size->x;
@@ -383,39 +383,52 @@ static void gen_rect_verts(unsigned pos,
    vb[pos * 6 + 5].w = (src_rect->y + src_rect->h) * src_inv_size->y;
 }
 
-static unsigned gen_verts(struct vl_compositor *c,
-                          struct pipe_video_rect *src_rect,
-                          struct vertex2f *src_inv_size,
-                          struct pipe_video_rect *dst_rect)
+static unsigned gen_data(struct vl_compositor *c,
+                         struct pipe_texture *src_surface,
+                         struct pipe_video_rect *src_rect,
+                         struct pipe_video_rect *dst_rect,
+                         struct pipe_texture **textures)
 {
    void *vb;
    unsigned num_rects = 0;
    unsigned i;
 
    assert(c);
+   assert(src_surface);
    assert(src_rect);
-   assert(src_inv_size);
    assert(dst_rect);
+   assert(textures);
 
    vb = pipe_buffer_map(c->pipe->screen, c->vertex_buf.buffer,
                         PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD);
 
+   if (!vb)
+      return 0;
+
    if (c->dirty_bg) {
       struct vertex2f bg_inv_size = {1.0f / c->bg->width0, 1.0f / c->bg->height0};
-      gen_rect_verts(num_rects++, &c->bg_src_rect, &bg_inv_size, NULL, NULL, vb);
+      gen_rect_verts(num_rects, &c->bg_src_rect, &bg_inv_size, NULL, NULL, vb);
+      textures[num_rects] = c->bg;
+      ++num_rects;
       c->dirty_bg = false;
    }
 
-   gen_rect_verts(num_rects++, src_rect, src_inv_size, dst_rect, &c->fb_inv_size, vb);
-
-   for (i = 0; c->dirty_layers > 0; i++)
    {
+      struct vertex2f src_inv_size = { 1.0f / src_surface->width0, 1.0f / src_surface->height0};
+      gen_rect_verts(num_rects, src_rect, &src_inv_size, dst_rect, &c->fb_inv_size, vb);
+      textures[num_rects] = src_surface;
+      ++num_rects;
+   }
+
+   for (i = 0; c->dirty_layers > 0; i++) {
       assert(i < VL_COMPOSITOR_MAX_LAYERS);
 
       if (c->dirty_layers & (1 << i)) {
          struct vertex2f layer_inv_size = {1.0f / c->layers[i]->width0, 1.0f / c->layers[i]->height0};
-         gen_rect_verts(num_rects++, &c->layer_src_rects[i], &layer_inv_size,
+         gen_rect_verts(num_rects, &c->layer_src_rects[i], &layer_inv_size,
                         &c->layer_dst_rects[i], &c->fb_inv_size, vb);
+         textures[num_rects] = c->layers[i];
+         ++num_rects;
          c->dirty_layers &= ~(1 << i);
       }
    }
@@ -425,6 +438,28 @@ static unsigned gen_verts(struct vl_compositor *c,
    return num_rects;
 }
 
+static void draw_layers(struct vl_compositor *c,
+                        struct pipe_texture *src_surface,
+                        struct pipe_video_rect *src_rect,
+                        struct pipe_video_rect *dst_rect)
+{
+   unsigned num_rects;
+   struct pipe_texture *textures[VL_COMPOSITOR_MAX_LAYERS + 2];
+   unsigned i;
+
+   assert(c);
+   assert(src_surface);
+   assert(src_rect);
+   assert(dst_rect);
+
+   num_rects = gen_data(c, src_surface, src_rect, dst_rect, textures);
+
+   for (i = 0; i < num_rects; ++i) {
+      c->pipe->set_fragment_sampler_textures(c->pipe, 1, &textures[i]);
+      c->pipe->draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
+   }
+}
+
 void vl_compositor_render(struct vl_compositor          *compositor,
                           struct pipe_texture           *src_surface,
                           enum pipe_mpeg12_picture_type picture_type,
@@ -437,8 +472,6 @@ void vl_compositor_render(struct vl_compositor          *compositor,
                           struct pipe_video_rect        *dst_area,
                           struct pipe_fence_handle      **fence)
 {
-   unsigned num_rects;
-
    assert(compositor);
    assert(src_surface);
    assert(src_area);
@@ -459,7 +492,7 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    (
       compositor->pipe->screen,
       dst_surface,
-      0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE
+      0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ_WRITE
    );
 
    compositor->viewport.scale[0] = compositor->fb_state.width;
@@ -474,22 +507,15 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state);
    compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport);
    compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 1, &compositor->sampler);
-   compositor->pipe->set_fragment_sampler_textures(compositor->pipe, 1, &src_surface);
    compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
    compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
    compositor->pipe->set_vertex_buffers(compositor->pipe, 1, &compositor->vertex_buf);
    compositor->pipe->set_vertex_elements(compositor->pipe, 2, compositor->vertex_elems);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, &compositor->fs_const_buf);
 
-   {
-      struct vertex2f src_inv_size = {1.0f / src_surface->width0, 1.0f / src_surface->height0};
-      num_rects = gen_verts(compositor, src_area, &src_inv_size, dst_area);
-   }
+   draw_layers(compositor, src_surface, src_area, dst_area);
 
    assert(!compositor->dirty_bg && !compositor->dirty_layers);
-   assert(num_rects > 0);
-
-   compositor->pipe->draw_arrays(compositor->pipe, PIPE_PRIM_TRIANGLES, 0, num_rects * 6);
    compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence);
 
    pipe_surface_reference(&compositor->fb_state.cbufs[0], NULL);
@@ -501,7 +527,8 @@ void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float
 
    memcpy
    (
-      pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE),
+      pipe_buffer_map(compositor->pipe->screen, compositor->fs_const_buf.buffer,
+                      PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD),
       mat,
       sizeof(struct fragment_shader_consts)
    );
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index f7231dc3f12..f43b3d63ad3 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -39,7 +39,7 @@ sp_mpeg12_destroy(struct pipe_video_context *vpipe)
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
 
    assert(vpipe);
-	
+
    /* Asserted in softpipe_delete_fs_state() for some reason */
    ctx->pipe->bind_vs_state(ctx->pipe, NULL);
    ctx->pipe->bind_fs_state(ctx->pipe, NULL);
@@ -119,8 +119,6 @@ sp_mpeg12_surface_copy(struct pipe_video_context *vpipe,
 
 static void
 sp_mpeg12_render_picture(struct pipe_video_context     *vpipe,
-                         /*struct pipe_surface         *backround,
-                         struct pipe_video_rect        *backround_area,*/
                          struct pipe_video_surface     *src_surface,
                          enum pipe_mpeg12_picture_type picture_type,
                          /*unsigned                    num_past_surfaces,
@@ -130,24 +128,50 @@ sp_mpeg12_render_picture(struct pipe_video_context     *vpipe,
                          struct pipe_video_rect        *src_area,
                          struct pipe_surface           *dst_surface,
                          struct pipe_video_rect        *dst_area,
-                         /*unsigned                      num_layers,
-                         struct pipe_surface           *layers,
-                         struct pipe_video_rect        *layer_src_areas,
-                         struct pipe_video_rect        *layer_dst_areas*/
                          struct pipe_fence_handle      **fence)
 {
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-	
+
    assert(vpipe);
    assert(src_surface);
    assert(src_area);
    assert(dst_surface);
    assert(dst_area);
-	
+
    vl_compositor_render(&ctx->compositor, softpipe_video_surface(src_surface)->tex,
                         picture_type, src_area, dst_surface->texture, dst_area, fence);
 }
 
+static void
+sp_mpeg12_set_picture_background(struct pipe_video_context *vpipe,
+                                  struct pipe_texture *bg,
+                                  struct pipe_video_rect *bg_src_rect)
+{
+   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(bg);
+   assert(bg_src_rect);
+
+   vl_compositor_set_background(&ctx->compositor, bg, bg_src_rect);
+}
+
+static void
+sp_mpeg12_set_picture_layers(struct pipe_video_context *vpipe,
+                             struct pipe_texture *layers[],
+                             struct pipe_video_rect *src_rects[],
+                             struct pipe_video_rect *dst_rects[],
+                             unsigned num_layers)
+{
+   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert((layers && src_rects && dst_rects) ||
+          (!layers && !src_rects && !dst_rects));
+
+   vl_compositor_set_layers(&ctx->compositor, layers, src_rects, dst_rects, num_layers);
+}
+
 static void
 sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
                             struct pipe_video_surface *dt)
@@ -179,7 +203,7 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
    unsigned i;
 
    assert(ctx);
-	
+
    rast.flatshade = 1;
    rast.flatshade_first = 0;
    rast.light_twoside = 0;
@@ -244,7 +268,7 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
    dsa.alpha.ref_value = 0;
    ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa);
    ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
-	
+
    return true;
 }
 
@@ -276,6 +300,8 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
    ctx->base.render_picture = sp_mpeg12_render_picture;
    ctx->base.surface_fill = sp_mpeg12_surface_fill;
    ctx->base.surface_copy = sp_mpeg12_surface_copy;
+   ctx->base.set_picture_background = sp_mpeg12_set_picture_background;
+   ctx->base.set_picture_layers = sp_mpeg12_set_picture_layers;
    ctx->base.set_decode_target = sp_mpeg12_set_decode_target;
    ctx->base.set_csc_matrix = sp_mpeg12_set_csc_matrix;
 
@@ -288,14 +314,14 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
       FREE(ctx);
       return NULL;
    }
-	
+
    if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) {
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
    }
-	
+
    if (!init_pipe_state(ctx)) {
       vl_compositor_cleanup(&ctx->compositor);
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 9ae595b224e..b3346b219f8 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -109,6 +109,16 @@ struct pipe_video_context
     * Parameter-like states (or properties)
     */
    /*@{*/
+   void (*set_picture_background)(struct pipe_video_context *vpipe,
+                                  struct pipe_texture *bg,
+                                  struct pipe_video_rect *bg_src_rect);
+
+   void (*set_picture_layers)(struct pipe_video_context *vpipe,
+                              struct pipe_texture *layers[],
+                              struct pipe_video_rect *src_rects[],
+                              struct pipe_video_rect *dst_rects[],
+                              unsigned num_layers);
+
    void (*set_picture_desc)(struct pipe_video_context *vpipe,
                             const struct pipe_picture_desc *desc);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index d64d075f330..4f75c73413b 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -42,7 +42,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
                             unsigned short width, unsigned short height, int xvimage_id)
 {
    XvMCContextPrivate *context_priv;
-   XvMCSubPicturePrivate *subpicture_priv;
+   XvMCSubpicturePrivate *subpicture_priv;
    struct pipe_video_context *vpipe;
    struct pipe_texture template;
    struct pipe_texture *tex;
@@ -67,7 +67,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    if (xvimage_id != FOURCC_RGB)
       return BadMatch;
 
-   subpicture_priv = CALLOC(1, sizeof(XvMCSubPicturePrivate));
+   subpicture_priv = CALLOC(1, sizeof(XvMCSubpicturePrivate));
    if (!subpicture_priv)
       return BadAlloc;
 
@@ -84,12 +84,13 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
       template.height0 = util_next_power_of_two(height);
    }
    template.depth0 = 1;
-   template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
+   template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
    subpicture_priv->context = context;
    tex = vpipe->screen->texture_create(vpipe->screen, &template);
    subpicture_priv->sfc = vpipe->screen->get_tex_surface(vpipe->screen, tex, 0, 0, 0,
-                                                         PIPE_BUFFER_USAGE_GPU_READ_WRITE);
+                                                         PIPE_BUFFER_USAGE_CPU_WRITE |
+                                                         PIPE_BUFFER_USAGE_GPU_READ);
    pipe_texture_reference(&tex, NULL);
    if (!subpicture_priv->sfc)
    {
@@ -120,8 +121,9 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
 Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, short y,
                            unsigned short width, unsigned short height, unsigned int color)
 {
-   XvMCSubPicturePrivate *subpicture_priv;
+   XvMCSubpicturePrivate *subpicture_priv;
    XvMCContextPrivate *context_priv;
+
    assert(dpy);
 
    if (!subpicture)
@@ -141,6 +143,15 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
                                short srcx, short srcy, unsigned short width, unsigned short height,
                                short dstx, short dsty)
 {
+   XvMCSubpicturePrivate *subpicture_priv;
+   XvMCContextPrivate *context_priv;
+   struct pipe_screen *screen;
+   struct pipe_transfer *xfer;
+   unsigned char *src, *dst;
+   unsigned x, y;
+
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Compositing subpicture %p.\n", subpicture);
+
    assert(dpy);
 
    if (!subpicture)
@@ -151,14 +162,56 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
    if (subpicture->xvimage_id != image->id)
       return BadMatch;
 
+   /* No planar support for now */
+   if (image->num_planes != 1)
+      return BadMatch;
+
+   subpicture_priv = subpicture->privData;
+   context_priv = subpicture_priv->context->privData;
+   screen = context_priv->vctx->vpipe->screen;
+
    /* TODO: Assert rects are within bounds? Or clip? */
 
+   xfer = screen->get_tex_transfer(screen, subpicture_priv->sfc->texture, 0, 0, 0,
+                                   PIPE_TRANSFER_WRITE, dstx, dsty, width, height);
+   if (!xfer)
+      return BadAlloc;
+
+   src = image->data;
+   dst = screen->transfer_map(screen, xfer);
+   if (!dst) {
+      screen->tex_transfer_destroy(xfer);
+      return BadAlloc;
+   }
+
+   switch (image->id)
+   {
+      case FOURCC_RGB:
+         assert(subpicture_priv->sfc->format == PIPE_FORMAT_X8R8G8B8_UNORM);
+         for (y = 0; y < height; ++y) {
+            for (x = 0; x < width; ++x, src += 3, dst += 4) {
+               /* TODO: Confirm or fix */
+               dst[0] = src[0];
+               dst[1] = src[1];
+               dst[2] = src[2];
+            }
+         }
+         break;
+      default:
+         assert(false);
+   }
+
+   screen->transfer_unmap(screen, xfer);
+   screen->tex_transfer_destroy(xfer);
+
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p composited.\n", subpicture);
+
    return Success;
 }
 
 Status XvMCDestroySubpicture(Display *dpy, XvMCSubpicture *subpicture)
 {
-   XvMCSubPicturePrivate *subpicture_priv;
+   XvMCSubpicturePrivate *subpicture_priv;
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Destroying subpicture %p.\n", subpicture);
 
@@ -193,6 +246,11 @@ Status XvMCBlendSubpicture(Display *dpy, XvMCSurface *target_surface, XvMCSubpic
                            short subx, short suby, unsigned short subw, unsigned short subh,
                            short surfx, short surfy, unsigned short surfw, unsigned short surfh)
 {
+   XvMCSurfacePrivate *surface_priv;
+   XvMCSubpicturePrivate *subpicture_priv;
+
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Associating subpicture %p with surface %p.\n", subpicture, target_surface);
+
    assert(dpy);
 
    if (!target_surface)
@@ -204,7 +262,24 @@ Status XvMCBlendSubpicture(Display *dpy, XvMCSurface *target_surface, XvMCSubpic
    if (target_surface->context_id != subpicture->context_id)
       return BadMatch;
 
+   /* TODO: Verify against subpicture independent scaling */
+
+   surface_priv = target_surface->privData;
+   subpicture_priv = subpicture->privData;
+
    /* TODO: Assert rects are within bounds? Or clip? */
+
+   surface_priv->subpicture = subpicture;
+   surface_priv->subx = subx;
+   surface_priv->suby = suby;
+   surface_priv->subw = subw;
+   surface_priv->subh = subh;
+   surface_priv->surfx = surfx;
+   surface_priv->surfy = surfy;
+   surface_priv->surfw = surfw;
+   surface_priv->surfh = surfh;
+   subpicture_priv->surface = target_surface;
+
    return Success;
 }
 
@@ -227,6 +302,7 @@ Status XvMCBlendSubpicture2(Display *dpy, XvMCSurface *source_surface, XvMCSurfa
       return BadMatch;
 
    /* TODO: Assert rects are within bounds? Or clip? */
+
    return Success;
 }
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 79dae3fb8b7..4d6c5246b5e 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -342,6 +342,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    struct pipe_video_context *vpipe;
    XvMCSurfacePrivate *surface_priv;
    XvMCContextPrivate *context_priv;
+   XvMCSubpicturePrivate *subpicture_priv;
    XvMCContext *context;
    struct pipe_video_rect src_rect = {srcx, srcy, srcw, srch};
    struct pipe_video_rect dst_rect = {destx, desty, destw, desth};
@@ -374,14 +375,34 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    surface_priv = surface->privData;
    context = surface_priv->context;
    context_priv = context->privData;
+   subpicture_priv = surface_priv->subpicture ? surface_priv->subpicture->privData : NULL;
    vpipe = context_priv->vctx->vpipe;
 
    if (!CreateOrResizeBackBuffer(context_priv->vctx, width, height, &context_priv->backbuffer))
       return BadAlloc;
 
+   if (subpicture_priv) {
+      struct pipe_video_rect src_rect = {surface_priv->subx, surface_priv->suby, surface_priv->subw, surface_priv->subh};
+      struct pipe_video_rect dst_rect = {surface_priv->surfx, surface_priv->surfy, surface_priv->surfw, surface_priv->surfh};
+      struct pipe_video_rect *src_rects[1] = {&src_rect};
+      struct pipe_video_rect *dst_rects[1] = {&dst_rect};
+
+      XVMC_MSG(XVMC_TRACE, "[XvMC] Surface %p has subpicture %p.\n", surface, surface_priv->subpicture);
+
+      assert(subpicture_priv->surface == surface);
+      vpipe->set_picture_layers(vpipe, &subpicture_priv->sfc->texture, &src_rects, &dst_rects, 1);
+
+      surface_priv->subpicture = NULL;
+      subpicture_priv->surface = NULL;
+   }
+   else
+      vpipe->set_picture_layers(vpipe, NULL, NULL, NULL, 0);
+
    vpipe->render_picture(vpipe, surface_priv->pipe_vsfc, PictureToPipe(flags), &src_rect,
                          context_priv->backbuffer, &dst_rect, surface_priv->disp_fence);
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);
+
    vl_video_bind_drawable(context_priv->vctx, drawable);
 
    vpipe->screen->flush_frontbuffer
@@ -391,7 +412,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
       vpipe->priv
    );
 
-   XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display.\n", surface);
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Pushed surface %p to front buffer.\n", surface);
 
    return Success;
 }
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 96fe7a9f5e3..df2a0dcc6f3 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #ifndef xvmc_private_h
@@ -41,28 +41,39 @@ struct pipe_fence_handle;
 
 typedef struct
 {
-	struct vl_context *vctx;
-	struct pipe_surface *backbuffer;
+   struct vl_context *vctx;
+   struct pipe_surface *backbuffer;
 } XvMCContextPrivate;
 
 typedef struct
 {
-	struct pipe_video_surface *pipe_vsfc;
-	struct pipe_fence_handle *render_fence;
-	struct pipe_fence_handle *disp_fence;
-	
-	/* Some XvMC functions take a surface but not a context,
-	   so we keep track of which context each surface belongs to. */
-	XvMCContext *context;
+   struct pipe_video_surface *pipe_vsfc;
+   struct pipe_fence_handle *render_fence;
+   struct pipe_fence_handle *disp_fence;
+
+   /* The subpicture associated with this surface, if any. */
+   XvMCSubpicture *subpicture;
+   short subx, suby;
+   unsigned short subw, subh;
+   short surfx, surfy;
+   unsigned short surfw, surfh;
+
+   /* Some XvMC functions take a surface but not a context,
+      so we keep track of which context each surface belongs to. */
+   XvMCContext *context;
 } XvMCSurfacePrivate;
 
 typedef struct
 {
-	struct pipe_surface *sfc;
-	/* Some XvMC functions take a subpicture but not a context,
-	   so we keep track of which context each subpicture belongs to. */
-	XvMCContext *context;
-} XvMCSubPicturePrivate;
+   struct pipe_surface *sfc;
+
+   /* The surface this subpicture is currently associated with, if any. */
+   XvMCSurface *surface;
+
+   /* Some XvMC functions take a subpicture but not a context,
+      so we keep track of which context each subpicture belongs to. */
+   XvMCContext *context;
+} XvMCSubpicturePrivate;
 
 #define XVMC_OUT   0
 #define XVMC_ERR   1
-- 
cgit v1.2.3


From 81badd502932b001e12464c28ba2a52c46fb643a Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Fri, 12 Mar 2010 10:43:29 -0500
Subject: gallium: Add common video format enums.

---
 src/gallium/auxiliary/util/u_format.csv |  7 +++++++
 src/gallium/include/pipe/p_format.h     | 25 ++++++++++++-------------
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index 96a0fa65507..9a7e4b9cb07 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -184,3 +184,10 @@ PIPE_FORMAT_R8_SSCALED            , plain, 1, 1, s8  ,     ,     ,     , x001, r
 PIPE_FORMAT_R8G8_SSCALED          , plain, 1, 1, s8  , s8  ,     ,     , xy01, rgb
 PIPE_FORMAT_R8G8B8_SSCALED        , plain, 1, 1, s8  , s8  , s8  ,     , xyz1, rgb
 PIPE_FORMAT_R8G8B8A8_SSCALED      , plain, 1, 1, s8  , s8  , s8  , s8  , xyzw, rgb
+PIPE_FORMAT_YV12                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_YV16                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_IYUV                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_NV12                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_NV21                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_IA44                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
+PIPE_FORMAT_AI44                  , subsampled, 1, 1, x8  , x8  , x8  , x8  , xyzw, yuv
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index cbf3273ec8d..f3534685b5a 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -58,8 +58,8 @@ enum pipe_format {
    PIPE_FORMAT_I8_UNORM              = 11,   /**< ubyte intensity */
    PIPE_FORMAT_L8A8_UNORM            = 12,   /**< ubyte alpha, luminance */
    PIPE_FORMAT_L16_UNORM             = 13,   /**< ushort luminance */
-   PIPE_FORMAT_UYVY                  = 14,
-   PIPE_FORMAT_YUYV                  = 15,
+   PIPE_FORMAT_UYVY                  = 14,   /**< aka Y422, UYNV, HDYC */
+   PIPE_FORMAT_YUYV                  = 15,   /**< aka YUY2, YUNV, V422 */
    PIPE_FORMAT_Z16_UNORM             = 16,
    PIPE_FORMAT_Z32_UNORM             = 17,
    PIPE_FORMAT_Z32_FLOAT             = 18,
@@ -158,6 +158,16 @@ enum pipe_format {
 
    PIPE_FORMAT_A8B8G8R8_UNORM        = 110,
 
+   PIPE_FORMAT_YV12                  = 111,
+   PIPE_FORMAT_YV16                  = 112,
+   PIPE_FORMAT_IYUV                  = 113,  /**< aka I420 */
+   PIPE_FORMAT_NV12                  = 114,
+   PIPE_FORMAT_NV21                  = 115,
+   PIPE_FORMAT_AYUV                  = PIPE_FORMAT_A8R8G8B8_UNORM,
+   PIPE_FORMAT_VUYA                  = PIPE_FORMAT_B8G8R8A8_UNORM,
+   PIPE_FORMAT_IA44                  = 116,
+   PIPE_FORMAT_AI44                  = 117,
+
    PIPE_FORMAT_COUNT
 };
 
@@ -169,17 +179,6 @@ enum pipe_video_chroma_format
    PIPE_VIDEO_CHROMA_FORMAT_444
 };
 
-#if 0
-enum pipe_video_surface_format
-{
-   PIPE_VIDEO_SURFACE_FORMAT_NV12,  /**< Planar; Y plane, UV plane */
-   PIPE_VIDEO_SURFACE_FORMAT_YV12,  /**< Planar; Y plane, U plane, V plane */
-   PIPE_VIDEO_SURFACE_FORMAT_YUYV,  /**< Interleaved; Y,U,Y,V,Y,U,Y,V */
-   PIPE_VIDEO_SURFACE_FORMAT_UYVY,  /**< Interleaved; U,Y,V,Y,U,Y,V,Y */
-   PIPE_VIDEO_SURFACE_FORMAT_VUYA   /**< Packed; A31-24|Y23-16|U15-8|V7-0 */
-};
-#endif
-
 #ifdef __cplusplus
 }
 #endif
-- 
cgit v1.2.3


From 299407aaa3d3a4ab96097e110df1a7db16eaee9c Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Fri, 12 Mar 2010 12:09:44 -0500
Subject: vl: Get rid of pipe_video_surface.

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h   |  1 -
 src/gallium/drivers/identity/id_objects.c          | 39 ---------------
 src/gallium/drivers/identity/id_objects.h          | 26 ----------
 src/gallium/drivers/identity/id_screen.c           | 33 -------------
 src/gallium/drivers/softpipe/sp_state_sampler.c    |  1 +
 src/gallium/drivers/softpipe/sp_texture.c          | 55 ----------------------
 src/gallium/drivers/softpipe/sp_texture.h          | 16 -------
 src/gallium/drivers/softpipe/sp_video_context.c    | 24 +++++-----
 src/gallium/drivers/softpipe/sp_video_context.h    |  3 +-
 src/gallium/include/pipe/p_video_context.h         | 19 +++-----
 src/gallium/include/pipe/p_video_state.h           | 20 --------
 src/gallium/state_trackers/xorg/xvmc/surface.c     | 31 +++++++++---
 .../state_trackers/xorg/xvmc/xvmc_private.h        |  2 +-
 13 files changed, 46 insertions(+), 224 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index f24edfcf194..532f346a17a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -34,7 +34,6 @@
 #include "vl_types.h"
 
 struct pipe_context;
-struct pipe_video_surface;
 struct pipe_macroblock;
 
 /* A slice is video-width (rounded up to a multiple of macroblock width) x macroblock height */
diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c
index 2b1a60c1bf1..c65c6bf0056 100644
--- a/src/gallium/drivers/identity/id_objects.c
+++ b/src/gallium/drivers/identity/id_objects.c
@@ -180,42 +180,3 @@ identity_transfer_destroy(struct identity_transfer *id_transfer)
    screen->tex_transfer_destroy(id_transfer->transfer);
    FREE(id_transfer);
 }
-
-struct pipe_video_surface *
-identity_video_surface_create(struct identity_screen *id_screen,
-                              struct pipe_video_surface *video_surface)
-{
-   struct identity_video_surface *id_video_surface;
-
-   if (!video_surface) {
-      goto error;
-   }
-
-   assert(video_surface->screen == id_screen->screen);
-
-   id_video_surface = CALLOC_STRUCT(identity_video_surface);
-   if (!id_video_surface) {
-      goto error;
-   }
-
-   memcpy(&id_video_surface->base,
-          video_surface,
-          sizeof(struct pipe_video_surface));
-
-   pipe_reference_init(&id_video_surface->base.reference, 1);
-   id_video_surface->base.screen = &id_screen->base;
-   id_video_surface->video_surface = video_surface;
-
-   return &id_video_surface->base;
-
-error:
-   pipe_video_surface_reference(&video_surface, NULL);
-   return NULL;
-}
-
-void
-identity_video_surface_destroy(struct identity_video_surface *id_video_surface)
-{
-   pipe_video_surface_reference(&id_video_surface->video_surface, NULL);
-   FREE(id_video_surface);
-}
diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h
index 77cc7190798..e89cb2e78c4 100644
--- a/src/gallium/drivers/identity/id_objects.h
+++ b/src/gallium/drivers/identity/id_objects.h
@@ -31,7 +31,6 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_state.h"
-#include "pipe/p_video_state.h"
 
 #include "id_screen.h"
 
@@ -68,14 +67,6 @@ struct identity_transfer
 };
 
 
-struct identity_video_surface
-{
-   struct pipe_video_surface base;
-
-   struct pipe_video_surface *video_surface;
-};
-
-
 static INLINE struct identity_buffer *
 identity_buffer(struct pipe_buffer *_buffer)
 {
@@ -112,16 +103,6 @@ identity_transfer(struct pipe_transfer *_transfer)
    return (struct identity_transfer *)_transfer;
 }
 
-static INLINE struct identity_video_surface *
-identity_video_surface(struct pipe_video_surface *_video_surface)
-{
-   if (!_video_surface) {
-      return NULL;
-   }
-   (void)identity_screen(_video_surface->screen);
-   return (struct identity_video_surface *)_video_surface;
-}
-
 static INLINE struct pipe_buffer *
 identity_buffer_unwrap(struct pipe_buffer *_buffer)
 {
@@ -183,12 +164,5 @@ identity_transfer_create(struct identity_texture *id_texture,
 void
 identity_transfer_destroy(struct identity_transfer *id_transfer);
 
-struct pipe_video_surface *
-identity_video_surface_create(struct identity_screen *id_screen,
-                              struct pipe_video_surface *video_surface);
-
-void
-identity_video_surface_destroy(struct identity_video_surface *id_video_surface);
-
 
 #endif /* ID_OBJECTS_H */
diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c
index b85492114a3..480ea802d04 100644
--- a/src/gallium/drivers/identity/id_screen.c
+++ b/src/gallium/drivers/identity/id_screen.c
@@ -394,33 +394,6 @@ identity_screen_buffer_destroy(struct pipe_buffer *_buffer)
    identity_buffer_destroy(identity_buffer(_buffer));
 }
 
-static struct pipe_video_surface *
-identity_screen_video_surface_create(struct pipe_screen *_screen,
-                                     enum pipe_video_chroma_format chroma_format,
-                                     unsigned width,
-                                     unsigned height)
-{
-   struct identity_screen *id_screen = identity_screen(_screen);
-   struct pipe_screen *screen = id_screen->screen;
-   struct pipe_video_surface *result;
-
-   result = screen->video_surface_create(screen,
-                                         chroma_format,
-                                         width,
-                                         height);
-
-   if (result) {
-      return identity_video_surface_create(id_screen, result);
-   }
-   return NULL;
-}
-
-static void
-identity_screen_video_surface_destroy(struct pipe_video_surface *_vsfc)
-{
-   identity_video_surface_destroy(identity_video_surface(_vsfc));
-}
-
 static void
 identity_screen_flush_frontbuffer(struct pipe_screen *_screen,
                                   struct pipe_surface *_surface,
@@ -515,12 +488,6 @@ identity_screen_create(struct pipe_screen *screen)
    if (screen->buffer_unmap)
       id_screen->base.buffer_unmap = identity_screen_buffer_unmap;
    id_screen->base.buffer_destroy = identity_screen_buffer_destroy;
-   if (screen->video_surface_create) {
-      id_screen->base.video_surface_create = identity_screen_video_surface_create;
-   }
-   if (screen->video_surface_destroy) {
-      id_screen->base.video_surface_destroy = identity_screen_video_surface_destroy;
-   }
    id_screen->base.flush_frontbuffer = identity_screen_flush_frontbuffer;
    id_screen->base.fence_reference = identity_screen_fence_reference;
    id_screen->base.fence_signalled = identity_screen_fence_signalled;
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index ceb4e338f1a..9298023bbe2 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -30,6 +30,7 @@
  */
 
 #include "util/u_memory.h"
+#include "util/u_inlines.h"
 
 #include "draw/draw_context.h"
 #include "draw/draw_context.h"
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 32d261b5ffc..d80a71aca50 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -402,58 +402,6 @@ softpipe_transfer_unmap(struct pipe_screen *screen,
 }
 
 
-static struct pipe_video_surface*
-softpipe_video_surface_create(struct pipe_screen *screen,
-                              enum pipe_video_chroma_format chroma_format,
-                              unsigned width, unsigned height)
-{
-   struct softpipe_video_surface *sp_vsfc;
-   struct pipe_texture template;
-
-   assert(screen);
-   assert(width && height);
-
-   sp_vsfc = CALLOC_STRUCT(softpipe_video_surface);
-   if (!sp_vsfc)
-      return NULL;
-
-   pipe_reference_init(&sp_vsfc->base.reference, 1);
-   sp_vsfc->base.screen = screen;
-   sp_vsfc->base.chroma_format = chroma_format;
-   /*sp_vsfc->base.surface_format = PIPE_VIDEO_SURFACE_FORMAT_VUYA;*/
-   sp_vsfc->base.width = width;
-   sp_vsfc->base.height = height;
-
-   memset(&template, 0, sizeof(struct pipe_texture));
-   template.target = PIPE_TEXTURE_2D;
-   template.format = PIPE_FORMAT_B8G8R8X8_UNORM;
-   template.last_level = 0;
-   /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */
-   template.width0 = util_next_power_of_two(width);
-   template.height0 = util_next_power_of_two(height);
-   template.depth0 = 1;
-   template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
-
-   sp_vsfc->tex = screen->texture_create(screen, &template);
-   if (!sp_vsfc->tex) {
-      FREE(sp_vsfc);
-      return NULL;
-   }
-
-   return &sp_vsfc->base;
-}
-
-
-static void
-softpipe_video_surface_destroy(struct pipe_video_surface *vsfc)
-{
-   struct softpipe_video_surface *sp_vsfc = softpipe_video_surface(vsfc);
-
-   pipe_texture_reference(&sp_vsfc->tex, NULL);
-   FREE(sp_vsfc);
-}
-
-
 void
 softpipe_init_screen_texture_funcs(struct pipe_screen *screen)
 {
@@ -468,9 +416,6 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen)
    screen->tex_transfer_destroy = softpipe_tex_transfer_destroy;
    screen->transfer_map = softpipe_transfer_map;
    screen->transfer_unmap = softpipe_transfer_unmap;
-
-   screen->video_surface_create = softpipe_video_surface_create;
-   screen->video_surface_destroy = softpipe_video_surface_destroy;
 }
 
 
diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h
index 2ef64e1e7c3..2537ab6a40d 100644
--- a/src/gallium/drivers/softpipe/sp_texture.h
+++ b/src/gallium/drivers/softpipe/sp_texture.h
@@ -30,7 +30,6 @@
 
 
 #include "pipe/p_state.h"
-#include "pipe/p_video_state.h"
 
 
 struct pipe_context;
@@ -63,15 +62,6 @@ struct softpipe_transfer
    unsigned long offset;
 };
 
-struct softpipe_video_surface
-{
-   struct pipe_video_surface base;
-
-   /* The data is held here:
-    */
-   struct pipe_texture *tex;
-};
-
 
 /** cast wrappers */
 static INLINE struct softpipe_texture *
@@ -86,12 +76,6 @@ softpipe_transfer(struct pipe_transfer *pt)
    return (struct softpipe_transfer *) pt;
 }
 
-static INLINE struct softpipe_video_surface *
-softpipe_video_surface(struct pipe_video_surface *pvs)
-{
-   return (struct softpipe_video_surface *) pvs;
-}
-
 
 extern void
 softpipe_init_screen_texture_funcs(struct pipe_screen *screen);
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index e575e238bd7..2be093f505d 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -51,7 +51,7 @@ sp_mpeg12_destroy(struct pipe_video_context *vpipe)
    ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast);
    ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
 
-   pipe_video_surface_reference(&ctx->decode_target, NULL);
+   pipe_surface_reference(&ctx->decode_target, NULL);
    vl_compositor_cleanup(&ctx->compositor);
    vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
    ctx->pipe->destroy(ctx->pipe);
@@ -61,8 +61,8 @@ sp_mpeg12_destroy(struct pipe_video_context *vpipe)
 
 static void
 sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
-                             struct pipe_video_surface *past,
-                             struct pipe_video_surface *future,
+                             struct pipe_surface *past,
+                             struct pipe_surface *future,
                              unsigned num_macroblocks,
                              struct pipe_macroblock *macroblocks,
                              struct pipe_fence_handle **fence)
@@ -77,9 +77,9 @@ sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
    assert(ctx->decode_target);
 
    vl_mpeg12_mc_renderer_render_macroblocks(&ctx->mc_renderer,
-                                            softpipe_video_surface(ctx->decode_target)->tex,
-                                            past ? softpipe_video_surface(past)->tex : NULL,
-                                            future ? softpipe_video_surface(future)->tex : NULL,
+                                            ctx->decode_target->texture,
+                                            past ? past->texture : NULL,
+                                            future ? future->texture : NULL,
                                             num_macroblocks, mpeg12_macroblocks, fence);
 }
 
@@ -122,12 +122,12 @@ sp_mpeg12_surface_copy(struct pipe_video_context *vpipe,
 
 static void
 sp_mpeg12_render_picture(struct pipe_video_context     *vpipe,
-                         struct pipe_video_surface     *src_surface,
+                         struct pipe_surface           *src_surface,
                          enum pipe_mpeg12_picture_type picture_type,
                          /*unsigned                    num_past_surfaces,
-                         struct pipe_video_surface     *past_surfaces,
+                         struct pipe_surface           *past_surfaces,
                          unsigned                      num_future_surfaces,
-                         struct pipe_video_surface     *future_surfaces,*/
+                         struct pipe_surface           *future_surfaces,*/
                          struct pipe_video_rect        *src_area,
                          struct pipe_surface           *dst_surface,
                          struct pipe_video_rect        *dst_area,
@@ -141,7 +141,7 @@ sp_mpeg12_render_picture(struct pipe_video_context     *vpipe,
    assert(dst_surface);
    assert(dst_area);
 
-   vl_compositor_render(&ctx->compositor, softpipe_video_surface(src_surface)->tex,
+   vl_compositor_render(&ctx->compositor, src_surface->texture,
                         picture_type, src_area, dst_surface->texture, dst_area, fence);
 }
 
@@ -177,14 +177,14 @@ sp_mpeg12_set_picture_layers(struct pipe_video_context *vpipe,
 
 static void
 sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
-                            struct pipe_video_surface *dt)
+                            struct pipe_surface *dt)
 {
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
 
    assert(vpipe);
    assert(dt);
 
-   pipe_video_surface_reference(&ctx->decode_target, dt);
+   pipe_surface_reference(&ctx->decode_target, dt);
 }
 
 static void
diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h
index 40743ac423c..bc5daa05ac6 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.h
+++ b/src/gallium/drivers/softpipe/sp_video_context.h
@@ -34,13 +34,12 @@
 
 struct pipe_screen;
 struct pipe_context;
-struct pipe_video_surface;
 
 struct sp_mpeg12_context
 {
    struct pipe_video_context base;
    struct pipe_context *pipe;
-   struct pipe_video_surface *decode_target;
+   struct pipe_surface *decode_target;
    struct vl_mpeg12_mc_renderer mc_renderer;
    struct vl_compositor compositor;
 
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index b3346b219f8..83d214cc53d 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -37,7 +37,6 @@ extern "C" {
 struct pipe_screen;
 struct pipe_buffer;
 struct pipe_surface;
-struct pipe_video_surface;
 struct pipe_macroblock;
 struct pipe_picture_desc;
 struct pipe_fence_handle;
@@ -66,28 +65,22 @@ struct pipe_video_context
                             struct pipe_buffer **bitstream_buf);
 
    void (*decode_macroblocks)(struct pipe_video_context *vpipe,
-                              struct pipe_video_surface *past,
-                              struct pipe_video_surface *future,
+                              struct pipe_surface *past,
+                              struct pipe_surface *future,
                               unsigned num_macroblocks,
                               struct pipe_macroblock *macroblocks,
                               struct pipe_fence_handle **fence);
 
    void (*render_picture)(struct pipe_video_context     *vpipe,
-                          /*struct pipe_surface         *backround,
-                          struct pipe_video_rect        *backround_area,*/
-                          struct pipe_video_surface     *src_surface,
+                          struct pipe_surface           *src_surface,
                           enum pipe_mpeg12_picture_type picture_type,
                           /*unsigned                    num_past_surfaces,
-                          struct pipe_video_surface     *past_surfaces,
+                          struct pipe_surface           *past_surfaces,
                           unsigned                      num_future_surfaces,
-                          struct pipe_video_surface     *future_surfaces,*/
+                          struct pipe_surface           *future_surfaces,*/
                           struct pipe_video_rect        *src_area,
                           struct pipe_surface           *dst_surface,
                           struct pipe_video_rect        *dst_area,
-                          /*unsigned                      num_layers,
-                          struct pipe_texture           *layers,
-                          struct pipe_video_rect        *layer_src_areas,
-                          struct pipe_video_rect        *layer_dst_areas,*/
                           struct pipe_fence_handle      **fence);
 
    void (*surface_fill)(struct pipe_video_context *vpipe,
@@ -123,7 +116,7 @@ struct pipe_video_context
                             const struct pipe_picture_desc *desc);
 
    void (*set_decode_target)(struct pipe_video_context *vpipe,
-                             struct pipe_video_surface *dt);
+                             struct pipe_surface *dt);
 
    void (*set_csc_matrix)(struct pipe_video_context *vpipe, const float *mat);
 
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 96bab56741a..79ce174701b 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -38,26 +38,6 @@
 extern "C" {
 #endif
 
-struct pipe_video_surface
-{
-   struct pipe_reference reference;
-   struct pipe_screen *screen;
-   enum pipe_video_chroma_format chroma_format;
-   /*enum pipe_video_surface_format surface_format;*/
-   unsigned width;
-   unsigned height;
-};
-
-static INLINE void
-pipe_video_surface_reference(struct pipe_video_surface **ptr, struct pipe_video_surface *surf)
-{
-   struct pipe_video_surface *old_surf = *ptr;
-
-   if (pipe_reference(&(*ptr)->reference, &surf->reference))
-      old_surf->screen->video_surface_destroy(old_surf);
-   *ptr = surf;
-}
-
 struct pipe_video_rect
 {
    unsigned x, y, w, h;
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index db630c9e790..a2d71b5c14e 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -33,6 +33,7 @@
 #include <pipe/p_state.h>
 #include <util/u_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_math.h>
 #include "xvmc_private.h"
 
 static enum pipe_mpeg12_macroblock_type TypeToPipe(int xvmc_mb_type)
@@ -190,7 +191,9 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
    XvMCContextPrivate *context_priv;
    struct pipe_video_context *vpipe;
    XvMCSurfacePrivate *surface_priv;
-   struct pipe_video_surface *vsfc;
+   struct pipe_texture template;
+   struct pipe_texture *vsfc_tex;
+   struct pipe_surface *vsfc;
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Creating surface %p.\n", surface);
 
@@ -208,9 +211,25 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
    if (!surface_priv)
       return BadAlloc;
 
-   assert(vpipe->screen->video_surface_create);
-   vsfc = vpipe->screen->video_surface_create(vpipe->screen, vpipe->chroma_format,
-                                              vpipe->width, vpipe->height);
+   memset(&template, 0, sizeof(struct pipe_texture));
+   template.target = PIPE_TEXTURE_2D;
+   /* XXX: Let the pipe_video_context choose whatever format it likes to render to */
+   template.format = PIPE_FORMAT_AYUV;
+   template.last_level = 0;
+   /* XXX: vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true, clean this up */
+   template.width0 = util_next_power_of_two(context->width);
+   template.height0 = util_next_power_of_two(context->height);
+   template.depth0 = 1;
+   template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
+   vsfc_tex = vpipe->screen->texture_create(vpipe->screen, &template);
+   if (!vsfc_tex) {
+      FREE(surface_priv);
+      return BadAlloc;
+   }
+
+   vsfc = vpipe->screen->get_tex_surface(vpipe->screen, vsfc_tex, 0, 0, 0,
+                                         PIPE_BUFFER_USAGE_GPU_READ_WRITE);
+   pipe_texture_reference(&vsfc_tex, NULL);
    if (!vsfc) {
       FREE(surface_priv);
       return BadAlloc;
@@ -390,7 +409,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
       XVMC_MSG(XVMC_TRACE, "[XvMC] Surface %p has subpicture %p.\n", surface, surface_priv->subpicture);
 
       assert(subpicture_priv->surface == surface);
-      vpipe->set_picture_layers(vpipe, &subpicture_priv->sfc->texture, &src_rects, &dst_rects, 1);
+      vpipe->set_picture_layers(vpipe, &subpicture_priv->sfc, &src_rects, &dst_rects, 1);
 
       surface_priv->subpicture = NULL;
       subpicture_priv->surface = NULL;
@@ -443,7 +462,7 @@ Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
       return XvMCBadSurface;
 
    surface_priv = surface->privData;
-   pipe_video_surface_reference(&surface_priv->pipe_vsfc, NULL);
+   pipe_surface_reference(&surface_priv->pipe_vsfc, NULL);
    FREE(surface_priv);
    surface->privData = NULL;
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index df2a0dcc6f3..82f6fcad712 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -47,7 +47,7 @@ typedef struct
 
 typedef struct
 {
-   struct pipe_video_surface *pipe_vsfc;
+   struct pipe_surface *pipe_vsfc;
    struct pipe_fence_handle *render_fence;
    struct pipe_fence_handle *disp_fence;
 
-- 
cgit v1.2.3


From 69c3ad3fc174c4aaa7cb48cf693ebb4a4e130f4f Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Fri, 12 Mar 2010 12:37:19 -0500
Subject: vl: Use pipe_surface instead of pipe_texture in interfaces.

---
 src/gallium/auxiliary/vl/vl_compositor.c         | 57 +++++++++++-------------
 src/gallium/auxiliary/vl/vl_compositor.h         | 17 ++++---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 37 +++++++--------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  8 ++--
 src/gallium/drivers/softpipe/sp_video_context.c  | 15 +++----
 src/gallium/include/pipe/p_video_context.h       |  4 +-
 6 files changed, 62 insertions(+), 76 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index bafe232877e..b11f45fe5f5 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -293,14 +293,14 @@ void vl_compositor_cleanup(struct vl_compositor *compositor)
 }
 
 void vl_compositor_set_background(struct vl_compositor *compositor,
-                                 struct pipe_texture *bg, struct pipe_video_rect *bg_src_rect)
+                                 struct pipe_surface *bg, struct pipe_video_rect *bg_src_rect)
 {
    assert(compositor);
    assert((bg && bg_src_rect) || (!bg && !bg_src_rect));
 
    if (compositor->bg != bg ||
        !u_video_rects_equal(&compositor->bg_src_rect, bg_src_rect)) {
-      pipe_texture_reference(&compositor->bg, bg);
+      pipe_surface_reference(&compositor->bg, bg);
       /*if (!u_video_rects_equal(&compositor->bg_src_rect, bg_src_rect))*/
          compositor->bg_src_rect = *bg_src_rect;
       compositor->dirty_bg = true;
@@ -308,7 +308,7 @@ void vl_compositor_set_background(struct vl_compositor *compositor,
 }
 
 void vl_compositor_set_layers(struct vl_compositor *compositor,
-                              struct pipe_texture *layers[],
+                              struct pipe_surface *layers[],
                               struct pipe_video_rect *src_rects[],
                               struct pipe_video_rect *dst_rects[],
                               unsigned num_layers)
@@ -327,7 +327,7 @@ void vl_compositor_set_layers(struct vl_compositor *compositor,
           !u_video_rects_equal(&compositor->layer_src_rects[i], src_rects[i]) ||
           !u_video_rects_equal(&compositor->layer_dst_rects[i], dst_rects[i]))
       {
-         pipe_texture_reference(&compositor->layers[i], layers[i]);
+         pipe_surface_reference(&compositor->layers[i], layers[i]);
          /*if (!u_video_rects_equal(&compositor->layer_src_rects[i], src_rects[i]))*/
             compositor->layer_src_rects[i] = *src_rects[i];
          /*if (!u_video_rects_equal(&compositor->layer_dst_rects[i], dst_rects[i]))*/
@@ -337,7 +337,7 @@ void vl_compositor_set_layers(struct vl_compositor *compositor,
    }
 
    for (; i < VL_COMPOSITOR_MAX_LAYERS; ++i)
-      pipe_texture_reference(&compositor->layers[i], NULL);
+      pipe_surface_reference(&compositor->layers[i], NULL);
 }
 
 static void gen_rect_verts(unsigned pos,
@@ -385,10 +385,10 @@ static void gen_rect_verts(unsigned pos,
 }
 
 static unsigned gen_data(struct vl_compositor *c,
-                         struct pipe_texture *src_surface,
+                         struct pipe_surface *src_surface,
                          struct pipe_video_rect *src_rect,
                          struct pipe_video_rect *dst_rect,
-                         struct pipe_texture **textures)
+                         struct pipe_surface **textures)
 {
    void *vb;
    unsigned num_rects = 0;
@@ -407,7 +407,7 @@ static unsigned gen_data(struct vl_compositor *c,
       return 0;
 
    if (c->dirty_bg) {
-      struct vertex2f bg_inv_size = {1.0f / c->bg->width0, 1.0f / c->bg->height0};
+      struct vertex2f bg_inv_size = {1.0f / c->bg->width, 1.0f / c->bg->height};
       gen_rect_verts(num_rects, &c->bg_src_rect, &bg_inv_size, NULL, NULL, vb);
       textures[num_rects] = c->bg;
       ++num_rects;
@@ -415,7 +415,7 @@ static unsigned gen_data(struct vl_compositor *c,
    }
 
    {
-      struct vertex2f src_inv_size = { 1.0f / src_surface->width0, 1.0f / src_surface->height0};
+      struct vertex2f src_inv_size = { 1.0f / src_surface->width, 1.0f / src_surface->height};
       gen_rect_verts(num_rects, src_rect, &src_inv_size, dst_rect, &c->fb_inv_size, vb);
       textures[num_rects] = src_surface;
       ++num_rects;
@@ -425,7 +425,7 @@ static unsigned gen_data(struct vl_compositor *c,
       assert(i < VL_COMPOSITOR_MAX_LAYERS);
 
       if (c->dirty_layers & (1 << i)) {
-         struct vertex2f layer_inv_size = {1.0f / c->layers[i]->width0, 1.0f / c->layers[i]->height0};
+         struct vertex2f layer_inv_size = {1.0f / c->layers[i]->width, 1.0f / c->layers[i]->height};
          gen_rect_verts(num_rects, &c->layer_src_rects[i], &layer_inv_size,
                         &c->layer_dst_rects[i], &c->fb_inv_size, vb);
          textures[num_rects] = c->layers[i];
@@ -440,12 +440,12 @@ static unsigned gen_data(struct vl_compositor *c,
 }
 
 static void draw_layers(struct vl_compositor *c,
-                        struct pipe_texture *src_surface,
+                        struct pipe_surface *src_surface,
                         struct pipe_video_rect *src_rect,
                         struct pipe_video_rect *dst_rect)
 {
    unsigned num_rects;
-   struct pipe_texture *textures[VL_COMPOSITOR_MAX_LAYERS + 2];
+   struct pipe_surface *src_surfaces[VL_COMPOSITOR_MAX_LAYERS + 2];
    unsigned i;
 
    assert(c);
@@ -453,23 +453,23 @@ static void draw_layers(struct vl_compositor *c,
    assert(src_rect);
    assert(dst_rect);
 
-   num_rects = gen_data(c, src_surface, src_rect, dst_rect, textures);
+   num_rects = gen_data(c, src_surface, src_rect, dst_rect, src_surfaces);
 
    for (i = 0; i < num_rects; ++i) {
-      c->pipe->set_fragment_sampler_textures(c->pipe, 1, &textures[i]);
+      c->pipe->set_fragment_sampler_textures(c->pipe, 1, &src_surfaces[i]->texture);
       c->pipe->draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
    }
 }
 
 void vl_compositor_render(struct vl_compositor          *compositor,
-                          struct pipe_texture           *src_surface,
+                          struct pipe_surface           *src_surface,
                           enum pipe_mpeg12_picture_type picture_type,
                           /*unsigned                    num_past_surfaces,
-                          struct pipe_texture           *past_surfaces,
+                          struct pipe_surface           *past_surfaces,
                           unsigned                      num_future_surfaces,
-                          struct pipe_texture           *future_surfaces,*/
+                          struct pipe_surface           *future_surfaces,*/
                           struct pipe_video_rect        *src_area,
-                          struct pipe_texture           *dst_surface,
+                          struct pipe_surface           *dst_surface,
                           struct pipe_video_rect        *dst_area,
                           struct pipe_fence_handle      **fence)
 {
@@ -480,21 +480,16 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    assert(dst_area);
    assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME);
 
-   if (compositor->fb_state.width != dst_surface->width0) {
-      compositor->fb_inv_size.x = 1.0f / dst_surface->width0;
-      compositor->fb_state.width = dst_surface->width0;
+   if (compositor->fb_state.width != dst_surface->width) {
+      compositor->fb_inv_size.x = 1.0f / dst_surface->width;
+      compositor->fb_state.width = dst_surface->width;
    }
-   if (compositor->fb_state.height != dst_surface->height0) {
-      compositor->fb_inv_size.y = 1.0f / dst_surface->height0;
-      compositor->fb_state.height = dst_surface->height0;
+   if (compositor->fb_state.height != dst_surface->height) {
+      compositor->fb_inv_size.y = 1.0f / dst_surface->height;
+      compositor->fb_state.height = dst_surface->height;
    }
 
-   compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface
-   (
-      compositor->pipe->screen,
-      dst_surface,
-      0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ_WRITE
-   );
+   compositor->fb_state.cbufs[0] = dst_surface;
 
    compositor->viewport.scale[0] = compositor->fb_state.width;
    compositor->viewport.scale[1] = compositor->fb_state.height;
@@ -518,8 +513,6 @@ void vl_compositor_render(struct vl_compositor          *compositor,
 
    assert(!compositor->dirty_bg && !compositor->dirty_layers);
    compositor->pipe->flush(compositor->pipe, PIPE_FLUSH_RENDER_CACHE, fence);
-
-   pipe_surface_reference(&compositor->fb_state.cbufs[0], NULL);
 }
 
 void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float *mat)
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index a75223c773f..3b1e809b868 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -34,7 +34,6 @@
 #include "vl_types.h"
 
 struct pipe_context;
-struct pipe_texture;
 
 #define VL_COMPOSITOR_MAX_LAYERS 16
 
@@ -52,10 +51,10 @@ struct vl_compositor
    struct pipe_vertex_element vertex_elems[2];
    struct pipe_buffer *fs_const_buf;
 
-   struct pipe_texture *bg;
+   struct pipe_surface *bg;
    struct pipe_video_rect bg_src_rect;
    bool dirty_bg;
-   struct pipe_texture *layers[VL_COMPOSITOR_MAX_LAYERS];
+   struct pipe_surface *layers[VL_COMPOSITOR_MAX_LAYERS];
    struct pipe_video_rect layer_src_rects[VL_COMPOSITOR_MAX_LAYERS];
    struct pipe_video_rect layer_dst_rects[VL_COMPOSITOR_MAX_LAYERS];
    unsigned dirty_layers;
@@ -66,23 +65,23 @@ bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *p
 void vl_compositor_cleanup(struct vl_compositor *compositor);
 
 void vl_compositor_set_background(struct vl_compositor *compositor,
-                                  struct pipe_texture *bg, struct pipe_video_rect *bg_src_rect);
+                                  struct pipe_surface *bg, struct pipe_video_rect *bg_src_rect);
 
 void vl_compositor_set_layers(struct vl_compositor *compositor,
-                              struct pipe_texture *layers[],
+                              struct pipe_surface *layers[],
                               struct pipe_video_rect *src_rects[],
                               struct pipe_video_rect *dst_rects[],
                               unsigned num_layers);
 
 void vl_compositor_render(struct vl_compositor          *compositor,
-                          struct pipe_texture           *src_surface,
+                          struct pipe_surface           *src_surface,
                           enum pipe_mpeg12_picture_type picture_type,
                           /*unsigned                    num_past_surfaces,
-                          struct pipe_texture           *past_surfaces,
+                          struct pipe_surface           *past_surfaces,
                           unsigned                      num_future_surfaces,
-                          struct pipe_texture           *future_surfaces,*/
+                          struct pipe_surface           *future_surfaces,*/
                           struct pipe_video_rect        *src_area,
-                          struct pipe_texture           *dst_surface,
+                          struct pipe_surface           *dst_surface,
                           struct pipe_video_rect        *dst_area,
                           struct pipe_fence_handle      **fence);
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index eca3452a5b4..2e611fca96b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -997,11 +997,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    gen_macroblock_stream(r, num_macroblocks);
 
-   r->fb_state.cbufs[0] = r->pipe->screen->get_tex_surface
-   (
-      r->pipe->screen, r->surface,
-      0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE
-   );
+   r->fb_state.cbufs[0] = r->surface;
 
    r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
    r->pipe->set_viewport_state(r->pipe, &r->viewport);
@@ -1012,8 +1008,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
    );
 
-   vs_consts->denorm.x = r->surface->width0;
-   vs_consts->denorm.y = r->surface->height0;
+   vs_consts->denorm.x = r->surface->width;
+   vs_consts->denorm.y = r->surface->height;
 
    pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf);
 
@@ -1036,7 +1032,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
-      r->textures.individual.ref[0] = r->past;
+      r->textures.individual.ref[0] = r->past->texture;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
@@ -1050,7 +1046,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
-      r->textures.individual.ref[0] = r->past;
+      r->textures.individual.ref[0] = r->past->texture;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
@@ -1064,7 +1060,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
-      r->textures.individual.ref[0] = r->future;
+      r->textures.individual.ref[0] = r->future->texture;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
@@ -1078,7 +1074,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->set_vertex_elements(r->pipe, 6, r->vertex_elems);
-      r->textures.individual.ref[0] = r->future;
+      r->textures.individual.ref[0] = r->future->texture;
       r->pipe->set_fragment_sampler_textures(r->pipe, 4, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
@@ -1092,8 +1088,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
    if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
       r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
-      r->textures.individual.ref[0] = r->past;
-      r->textures.individual.ref[1] = r->future;
+      r->textures.individual.ref[0] = r->past->texture;
+      r->textures.individual.ref[1] = r->future->texture;
       r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
@@ -1107,8 +1103,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
    if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
       r->pipe->set_vertex_elements(r->pipe, 8, r->vertex_elems);
-      r->textures.individual.ref[0] = r->past;
-      r->textures.individual.ref[1] = r->future;
+      r->textures.individual.ref[0] = r->past->texture;
+      r->textures.individual.ref[1] = r->future->texture;
       r->pipe->set_fragment_sampler_textures(r->pipe, 5, r->textures.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
@@ -1120,7 +1116,6 @@ flush(struct vl_mpeg12_mc_renderer *r)
    }
 
    r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
-   pipe_surface_reference(&r->fb_state.cbufs[0], NULL);
 
    if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
       for (i = 0; i < 3; ++i)
@@ -1328,9 +1323,9 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 void
 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
                                          *renderer,
-                                         struct pipe_texture *surface,
-                                         struct pipe_texture *past,
-                                         struct pipe_texture *future,
+                                         struct pipe_surface *surface,
+                                         struct pipe_surface *past,
+                                         struct pipe_surface *future,
                                          unsigned num_macroblocks,
                                          struct pipe_mpeg12_macroblock
                                          *mpeg12_macroblocks,
@@ -1365,8 +1360,8 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
       renderer->past = past;
       renderer->future = future;
       renderer->fence = fence;
-      renderer->surface_tex_inv_size.x = 1.0f / surface->width0;
-      renderer->surface_tex_inv_size.y = 1.0f / surface->height0;
+      renderer->surface_tex_inv_size.x = 1.0f / surface->width;
+      renderer->surface_tex_inv_size.y = 1.0f / surface->height;
    }
 
    while (num_macroblocks) {
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 532f346a17a..1a216b5ad49 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -87,7 +87,7 @@ struct vl_mpeg12_mc_renderer
       struct { struct pipe_vertex_buffer ycbcr, ref[2]; } individual;
    } vertex_bufs;
 
-   struct pipe_texture *surface, *past, *future;
+   struct pipe_surface *surface, *past, *future;
    struct pipe_fence_handle **fence;
    unsigned num_macroblocks;
    struct pipe_mpeg12_macroblock *macroblock_buf;
@@ -109,9 +109,9 @@ bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
 void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 
 void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
-                                              struct pipe_texture *surface,
-                                              struct pipe_texture *past,
-                                              struct pipe_texture *future,
+                                              struct pipe_surface *surface,
+                                              struct pipe_surface *past,
+                                              struct pipe_surface *future,
                                               unsigned num_macroblocks,
                                               struct pipe_mpeg12_macroblock *mpeg12_macroblocks,
                                               struct pipe_fence_handle **fence);
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 2be093f505d..7850908b134 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -77,10 +77,9 @@ sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
    assert(ctx->decode_target);
 
    vl_mpeg12_mc_renderer_render_macroblocks(&ctx->mc_renderer,
-                                            ctx->decode_target->texture,
-                                            past ? past->texture : NULL,
-                                            future ? future->texture : NULL,
-                                            num_macroblocks, mpeg12_macroblocks, fence);
+                                            ctx->decode_target,
+                                            past, future, num_macroblocks,
+                                            mpeg12_macroblocks, fence);
 }
 
 static void
@@ -141,13 +140,13 @@ sp_mpeg12_render_picture(struct pipe_video_context     *vpipe,
    assert(dst_surface);
    assert(dst_area);
 
-   vl_compositor_render(&ctx->compositor, src_surface->texture,
-                        picture_type, src_area, dst_surface->texture, dst_area, fence);
+   vl_compositor_render(&ctx->compositor, src_surface,
+                        picture_type, src_area, dst_surface, dst_area, fence);
 }
 
 static void
 sp_mpeg12_set_picture_background(struct pipe_video_context *vpipe,
-                                  struct pipe_texture *bg,
+                                  struct pipe_surface *bg,
                                   struct pipe_video_rect *bg_src_rect)
 {
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
@@ -161,7 +160,7 @@ sp_mpeg12_set_picture_background(struct pipe_video_context *vpipe,
 
 static void
 sp_mpeg12_set_picture_layers(struct pipe_video_context *vpipe,
-                             struct pipe_texture *layers[],
+                             struct pipe_surface *layers[],
                              struct pipe_video_rect *src_rects[],
                              struct pipe_video_rect *dst_rects[],
                              unsigned num_layers)
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 83d214cc53d..f9dc625c7d7 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -103,11 +103,11 @@ struct pipe_video_context
     */
    /*@{*/
    void (*set_picture_background)(struct pipe_video_context *vpipe,
-                                  struct pipe_texture *bg,
+                                  struct pipe_surface *bg,
                                   struct pipe_video_rect *bg_src_rect);
 
    void (*set_picture_layers)(struct pipe_video_context *vpipe,
-                              struct pipe_texture *layers[],
+                              struct pipe_surface *layers[],
                               struct pipe_video_rect *src_rects[],
                               struct pipe_video_rect *dst_rects[],
                               unsigned num_layers);
-- 
cgit v1.2.3


From f1bbd41e326ce7a90e9b5956195203eee537cbc7 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Fri, 12 Mar 2010 13:36:52 -0500
Subject: vl: Don't wrap blocks in pipe_user_buffers.

Mallocing/free eat up a noticeable amount of CPU time for no
practical benefit.
---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 22 ++++++++++++----------
 src/gallium/include/pipe/p_video_state.h         |  2 +-
 src/gallium/state_trackers/xorg/xvmc/surface.c   |  6 +-----
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 2e611fca96b..769ee38c46b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -1235,19 +1235,15 @@ static void
 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
                 struct pipe_mpeg12_macroblock *mb)
 {
-   void *blocks;
-
    assert(r);
    assert(mb);
+   assert(mb->blocks);
    assert(r->num_macroblocks < r->macroblocks_per_batch);
 
    memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
           sizeof(struct pipe_mpeg12_macroblock));
 
-   blocks = pipe_buffer_map(r->pipe->screen, mb->blocks,
-                            PIPE_BUFFER_USAGE_CPU_READ);
-   grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, blocks);
-   pipe_buffer_unmap(r->pipe->screen, mb->blocks);
+   grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
 
    ++r->num_macroblocks;
 }
@@ -1318,6 +1314,10 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
    cleanup_pipe_state(renderer);
    cleanup_shaders(renderer);
    cleanup_buffers(renderer);
+
+   pipe_surface_reference(&renderer->surface, NULL);
+   pipe_surface_reference(&renderer->past, NULL);
+   pipe_surface_reference(&renderer->future, NULL);
 }
 
 void
@@ -1356,9 +1356,9 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
       new_surface = true;
 
    if (new_surface) {
-      renderer->surface = surface;
-      renderer->past = past;
-      renderer->future = future;
+      pipe_surface_reference(&renderer->surface, surface);
+      pipe_surface_reference(&renderer->past, past);
+      pipe_surface_reference(&renderer->future, future);
       renderer->fence = fence;
       renderer->surface_tex_inv_size.x = 1.0f / surface->width;
       renderer->surface_tex_inv_size.y = 1.0f / surface->height;
@@ -1381,7 +1381,9 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
          flush(renderer);
          xfer_buffers_map(renderer);
          /* Next time we get this surface it may have new ref frames */
-         renderer->surface = NULL;
+         pipe_surface_reference(&renderer->surface, NULL);
+         pipe_surface_reference(&renderer->past, NULL);
+         pipe_surface_reference(&renderer->future, NULL);
       }
    }
 }
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 79ce174701b..5eb96352139 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -90,7 +90,7 @@ struct pipe_mpeg12_macroblock
    enum pipe_mpeg12_dct_type dct_type;
    signed pmv[2][2][2];
    unsigned cbp;
-   struct pipe_buffer *blocks;
+   short *blocks;
 };
 
 #if 0
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index a2d71b5c14e..d2c4e5f19ca 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -178,8 +178,7 @@ MacroBlocksToPipe(struct pipe_screen *screen,
                pipe_macroblocks->pmv[j][k][l] = xvmc_mb->PMV[j][k][l];
 
       pipe_macroblocks->cbp = xvmc_mb->coded_block_pattern;
-      pipe_macroblocks->blocks = pipe_user_buffer_create(screen, xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES,
-                                                         BLOCK_SIZE_BYTES);
+      pipe_macroblocks->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
 
       ++pipe_macroblocks;
       ++xvmc_mb;
@@ -320,9 +319,6 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    vpipe->decode_macroblocks(vpipe, p_vsfc, f_vsfc, num_macroblocks,
                              &pipe_macroblocks->base, target_surface_priv->render_fence);
 
-   for (i = 0; i < num_macroblocks; ++i)
-      vpipe->screen->buffer_destroy(pipe_macroblocks[i].blocks);
-
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for rendering.\n", target_surface);
 
    return Success;
-- 
cgit v1.2.3


From 035332cbbbc173387b2c03c5f7120a2fdb608625 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Fri, 12 Mar 2010 14:38:37 -0500
Subject: vl: Remove pipe_video_surface hooks from pipe_screen as well.

---
 src/gallium/include/pipe/p_screen.h            | 12 ------------
 src/gallium/state_trackers/xorg/xvmc/surface.c |  3 +--
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index e4a92228093..b8e007ec8ac 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -55,7 +55,6 @@ struct pipe_winsys;
 struct pipe_buffer;
 struct pipe_texture;
 struct pipe_surface;
-struct pipe_video_surface;
 struct pipe_transfer;
 
 
@@ -260,17 +259,6 @@ struct pipe_screen {
 
    void (*buffer_destroy)( struct pipe_buffer *buf );
 
-   /**
-    * Create a video surface suitable for use as a decoding target by the
-    * driver's pipe_video_context.
-    */
-   struct pipe_video_surface*
-   (*video_surface_create)( struct pipe_screen *screen,
-                            enum pipe_video_chroma_format chroma_format,
-                            unsigned width, unsigned height );
-
-   void (*video_surface_destroy)( struct pipe_video_surface *vsfc );
-
    /**
     * Do any special operations to ensure buffer size is correct
     */
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index d2c4e5f19ca..354c257a806 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -127,8 +127,7 @@ CreateOrResizeBackBuffer(struct vl_context *vctx, unsigned int width, unsigned i
       return false;
 
    *backbuffer = vpipe->screen->get_tex_surface(vpipe->screen, tex, 0, 0, 0,
-                                                PIPE_BUFFER_USAGE_GPU_READ |
-                                                PIPE_BUFFER_USAGE_GPU_WRITE);
+                                                PIPE_BUFFER_USAGE_GPU_READ_WRITE);
    pipe_texture_reference(&tex, NULL);
 
    if (!*backbuffer)
-- 
cgit v1.2.3


From 99218cd2b3377cee1d2ec07ae4af1278660f61ae Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Fri, 12 Mar 2010 19:03:37 -0500
Subject: vl: Add some query methods to pipe_video_context and use them.

---
 src/gallium/drivers/softpipe/sp_video_context.c | 44 +++++++++++++++++++++++++
 src/gallium/include/pipe/p_video_context.h      | 18 ++++++++++
 src/gallium/state_trackers/xorg/xvmc/surface.c  | 21 +++++++++---
 3 files changed, 78 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 7850908b134..9d75a1e508f 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -59,6 +59,48 @@ sp_mpeg12_destroy(struct pipe_video_context *vpipe)
    FREE(ctx);
 }
 
+static int
+sp_mpeg12_get_param(struct pipe_video_context *vpipe, int param)
+{
+   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+
+   switch (param) {
+      case PIPE_CAP_NPOT_TEXTURES:
+         /* XXX: Temporary; not all paths are NPOT-tested */
+#if 0
+         return ctx->pipe->screen->get_param(ctx->pipe->screen, param);
+#endif
+         return FALSE;
+      case PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT:
+         return PIPE_FORMAT_AYUV;
+      default:
+      {
+         debug_printf("Softpipe: Unknown PIPE_CAP %d\n", param);
+         return 0;
+      }
+   }
+}
+
+static boolean
+sp_mpeg12_is_format_supported(struct pipe_video_context *vpipe,
+                              enum pipe_format format,
+                              unsigned usage,
+                              unsigned geom)
+{
+   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+
+   /* XXX: Temporary; not all paths are NPOT-tested */
+   if (geom & PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO)
+      return FALSE;
+
+   return ctx->pipe->screen->is_format_supported(ctx->pipe->screen, PIPE_TEXTURE_2D,
+                                                 format, usage, geom);
+}
+
 static void
 sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
                              struct pipe_surface *past,
@@ -297,6 +339,8 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
 
    ctx->base.screen = pipe->screen;
    ctx->base.destroy = sp_mpeg12_destroy;
+   ctx->base.get_param = sp_mpeg12_get_param;
+   ctx->base.is_format_supported = sp_mpeg12_is_format_supported;
    ctx->base.decode_macroblocks = sp_mpeg12_decode_macroblocks;
    ctx->base.render_picture = sp_mpeg12_render_picture;
    ctx->base.surface_fill = sp_mpeg12_surface_fill;
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index f9dc625c7d7..d90b667de6c 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -34,6 +34,9 @@ extern "C" {
 
 #include <pipe/p_video_state.h>
 
+/* XXX: Move to an appropriate place */
+#define PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT 256
+
 struct pipe_screen;
 struct pipe_buffer;
 struct pipe_surface;
@@ -54,6 +57,21 @@ struct pipe_video_context
 
    void *priv; /**< context private data (for DRI for example) */
 
+   /**
+    * Query an integer-valued capability/parameter/limit
+    * \param param  one of PIPE_CAP_x
+    */
+   int (*get_param)(struct pipe_video_context *vpipe, int param);
+
+   /**
+    * Check if the given pipe_format is supported as a texture or
+    * drawing surface.
+    */
+   boolean (*is_format_supported)(struct pipe_video_context *vpipe,
+                                  enum pipe_format format,
+                                  unsigned usage,
+                                  unsigned geom);
+
    void (*destroy)(struct pipe_video_context *vpipe);
 
    /**
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 354c257a806..998a7af0e95 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -211,12 +211,23 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
 
    memset(&template, 0, sizeof(struct pipe_texture));
    template.target = PIPE_TEXTURE_2D;
-   /* XXX: Let the pipe_video_context choose whatever format it likes to render to */
-   template.format = PIPE_FORMAT_AYUV;
+   template.format = (enum pipe_format)vpipe->get_param(vpipe, PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT);
    template.last_level = 0;
-   /* XXX: vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true, clean this up */
-   template.width0 = util_next_power_of_two(context->width);
-   template.height0 = util_next_power_of_two(context->height);
+   if (vpipe->is_format_supported(vpipe, template.format,
+                                  PIPE_TEXTURE_USAGE_SAMPLER |
+                                  PIPE_TEXTURE_USAGE_RENDER_TARGET,
+                                  PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO)) {
+      template.width0 = context->width;
+      template.height0 = context->height;
+   }
+   else {
+      assert(vpipe->is_format_supported(vpipe, template.format,
+                                       PIPE_TEXTURE_USAGE_SAMPLER |
+                                       PIPE_TEXTURE_USAGE_RENDER_TARGET,
+                                       PIPE_TEXTURE_GEOM_NON_SQUARE));
+      template.width0 = util_next_power_of_two(context->width);
+      template.height0 = util_next_power_of_two(context->height);
+   }
    template.depth0 = 1;
    template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
    vsfc_tex = vpipe->screen->texture_create(vpipe->screen, &template);
-- 
cgit v1.2.3


From 5eb822cb6a8fb461ee5b1bd881e0ef1b91c432b5 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sun, 18 Apr 2010 12:16:40 -0400
Subject: vl: Do some subpicture validation.

---
 src/gallium/state_trackers/xorg/xvmc/context.c     |  25 +++--
 src/gallium/state_trackers/xorg/xvmc/subpicture.c  | 118 ++++++++++++++++++---
 .../state_trackers/xorg/xvmc/xvmc_private.h        |   2 +
 3 files changed, 122 insertions(+), 23 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index d56d61c79c6..586cc1f45ac 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -40,7 +40,9 @@
 static Status Validate(Display *dpy, XvPortID port, int surface_type_id,
                        unsigned int width, unsigned int height, int flags,
                        bool *found_port, int *screen, int *chroma_format,
-                       int *mc_type, int *surface_flags)
+                       int *mc_type, int *surface_flags,
+                       unsigned short *subpic_max_w,
+                       unsigned short *subpic_max_h)
 {
    bool found_surface = false;
    XvAdaptorInfo *adaptor_info;
@@ -55,6 +57,8 @@ static Status Validate(Display *dpy, XvPortID port, int surface_type_id,
    assert(chroma_format);
    assert(mc_type);
    assert(surface_flags);
+   assert(subpic_max_w);
+   assert(subpic_max_h);
 
    *found_port = false;
 
@@ -88,16 +92,20 @@ static Status Validate(Display *dpy, XvPortID port, int surface_type_id,
                *chroma_format = surface_info[l].chroma_format;
                *mc_type = surface_info[l].mc_type;
                *surface_flags = surface_info[l].flags;
+               *subpic_max_w = surface_info[l].subpicture_max_width;
+               *subpic_max_h = surface_info[l].subpicture_max_height;
                *screen = i;
 
-               XVMC_MSG(XVMC_TRACE, "[XvMC] Found suitable context surface format.\n" \
+               XVMC_MSG(XVMC_TRACE, "[XvMC] Found requested context surface format.\n" \
                                     "[XvMC]   screen=%u, port=%u\n" \
-                                    "[XvMC]   id: 0x%08X\n" \
+                                    "[XvMC]   id=0x%08X\n" \
                                     "[XvMC]   max width=%u, max height=%u\n" \
                                     "[XvMC]   chroma format=0x%08X\n" \
                                     "[XvMC]   acceleration level=0x%08X\n" \
-                                    "[XvMC]   flags=0x%08X\n",
-                                    i, port, surface_type_id, max_width, max_height, *chroma_format, *mc_type, *surface_flags);
+                                    "[XvMC]   flags=0x%08X\n" \
+                                    "[XvMC]   subpicture max width=%u, max height=%u\n",
+                                    i, port, surface_type_id, max_width, max_height, *chroma_format,
+                                    *mc_type, *surface_flags, *subpic_max_w, *subpic_max_h);
             }
 
             XFree(surface_info);
@@ -172,6 +180,8 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
    int chroma_format;
    int mc_type;
    int surface_flags;
+   unsigned short subpic_max_w;
+   unsigned short subpic_max_h;
    Status ret;
    struct vl_screen *vscreen;
    struct vl_context *vctx;
@@ -186,7 +196,8 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       return XvMCBadContext;
 
    ret = Validate(dpy, port, surface_type_id, width, height, flags,
-                  &found_port, &scrn, &chroma_format, &mc_type, &surface_flags);
+                  &found_port, &scrn, &chroma_format, &mc_type, &surface_flags,
+                  &subpic_max_w, &subpic_max_h);
 
    /* Success and XvBadPort have the same value */
    if (ret != Success || !found_port)
@@ -239,6 +250,8 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
    vctx->vpipe->set_csc_matrix(vctx->vpipe, csc);
 
    context_priv->vctx = vctx;
+   context_priv->subpicture_max_width = subpic_max_w;
+   context_priv->subpicture_max_height = subpic_max_h;
 
    context->context_id = XAllocID(dpy);
    context->surface_type_id = surface_type_id;
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 4f75c73413b..a6d75f63259 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -28,6 +28,7 @@
 #include <assert.h>
 #include <X11/Xlibint.h>
 #include <X11/extensions/XvMClib.h>
+#include <xorg/fourcc.h>
 #include <vl_winsys.h>
 #include <pipe/p_screen.h>
 #include <pipe/p_video_context.h>
@@ -38,6 +39,93 @@
 
 #define FOURCC_RGB 0x0000003
 
+static enum pipe_format XvIDToPipe(int xvimage_id)
+{
+   switch (xvimage_id) {
+      case FOURCC_RGB:
+         return PIPE_FORMAT_B8G8R8X8_UNORM;
+      default:
+         XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized Xv image ID 0x%08X.\n", xvimage_id);
+         return PIPE_FORMAT_NONE;
+   }
+}
+
+static int PipeToComponentOrder(enum pipe_format format, char *component_order)
+{
+   assert(component_order);
+
+   switch (format) {
+      case PIPE_FORMAT_B8G8R8X8_UNORM:
+         return 0;
+      default:
+         XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized PIPE_FORMAT 0x%08X.\n", format);
+         component_order[0] = 0;
+         component_order[1] = 0;
+         component_order[2] = 0;
+         component_order[3] = 0;
+   }
+
+      return 0;
+}
+
+static Status Validate(Display *dpy, XvPortID port, int surface_type_id, int xvimage_id)
+{
+   XvImageFormatValues *subpictures;
+   int num_subpics;
+   unsigned int i;
+
+   subpictures = XvMCListSubpictureTypes(dpy, port, surface_type_id, &num_subpics);
+   if (num_subpics < 1) {
+      if (subpictures)
+         XFree(subpictures);
+      return BadMatch;
+   }
+   if (!subpictures)
+      return BadAlloc;
+
+   for (i = 0; i < num_subpics; ++i) {
+      if (subpictures[i].id == xvimage_id) {
+         XVMC_MSG(XVMC_TRACE, "[XvMC] Found requested subpicture format.\n" \
+                              "[XvMC]   port=%u\n" \
+                              "[XvMC]   surface id=0x%08X\n" \
+                              "[XvMC]   image id=0x%08X\n" \
+                              "[XvMC]   type=%08X\n" \
+                              "[XvMC]   byte order=%08X\n" \
+                              "[XvMC]   bits per pixel=%u\n" \
+                              "[XvMC]   format=%08X\n" \
+                              "[XvMC]   num planes=%d\n",
+                              port, surface_type_id, xvimage_id, subpictures[i].type, subpictures[i].byte_order,
+                              subpictures[i].bits_per_pixel, subpictures[i].format, subpictures[i].num_planes);
+         if (subpictures[i].type == XvRGB) {
+            XVMC_MSG(XVMC_TRACE, "[XvMC]   depth=%d\n" \
+                                 "[XvMC]   red mask=0x%08X\n" \
+                                 "[XvMC]   green mask=0x%08X\n" \
+                                 "[XvMC]   blue mask=0x%08X\n",
+                                 subpictures[i].depth, subpictures[i].red_mask, subpictures[i].green_mask, subpictures[i].blue_mask);
+         }
+         else if (subpictures[i].type == XvYUV) {
+            XVMC_MSG(XVMC_TRACE, "[XvMC]   y sample bits=0x%08X\n" \
+                                 "[XvMC]   u sample bits=0x%08X\n" \
+                                 "[XvMC]   v sample bits=0x%08X\n" \
+                                 "[XvMC]   horz y period=%u\n" \
+                                 "[XvMC]   horz u period=%u\n" \
+                                 "[XvMC]   horz v period=%u\n" \
+                                 "[XvMC]   vert y period=%u\n" \
+                                 "[XvMC]   vert u period=%u\n" \
+                                 "[XvMC]   vert v period=%u\n",
+                                 subpictures[i].y_sample_bits, subpictures[i].u_sample_bits, subpictures[i].v_sample_bits,
+                                 subpictures[i].horz_y_period, subpictures[i].horz_u_period, subpictures[i].horz_v_period,
+                                 subpictures[i].vert_y_period, subpictures[i].vert_u_period, subpictures[i].vert_v_period);
+         }
+         break;
+      }
+   }
+
+   XFree(subpictures);
+
+   return i < num_subpics ? Success : BadMatch;
+}
+
 Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *subpicture,
                             unsigned short width, unsigned short height, int xvimage_id)
 {
@@ -46,6 +134,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    struct pipe_video_context *vpipe;
    struct pipe_texture template;
    struct pipe_texture *tex;
+   Status ret;
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Creating subpicture %p.\n", subpicture);
 
@@ -60,12 +149,13 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    if (!subpicture)
       return XvMCBadSubpicture;
 
-   /* TODO: Check against surface max width, height */
-   if (width > 2048 || height > 2048)
+   if (width > context_priv->subpicture_max_width ||
+       height > context_priv->subpicture_max_height)
       return BadValue;
 
-   if (xvimage_id != FOURCC_RGB)
-      return BadMatch;
+   ret = Validate(dpy, context->port, context->surface_type_id, xvimage_id);
+   if (ret != Success)
+      return ret;
 
    subpicture_priv = CALLOC(1, sizeof(XvMCSubpicturePrivate));
    if (!subpicture_priv)
@@ -73,9 +163,9 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
 
    memset(&template, 0, sizeof(struct pipe_texture));
    template.target = PIPE_TEXTURE_2D;
-   template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
+   template.format = XvIDToPipe(xvimage_id);
    template.last_level = 0;
-   if (vpipe->screen->get_param(vpipe->screen, PIPE_CAP_NPOT_TEXTURES)) {
+   if (vpipe->get_param(vpipe, PIPE_CAP_NPOT_TEXTURES)) {
       template.width0 = width;
       template.height0 = height;
    }
@@ -92,8 +182,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
                                                          PIPE_BUFFER_USAGE_CPU_WRITE |
                                                          PIPE_BUFFER_USAGE_GPU_READ);
    pipe_texture_reference(&tex, NULL);
-   if (!subpicture_priv->sfc)
-   {
+   if (!subpicture_priv->sfc) {
       FREE(subpicture_priv);
       return BadAlloc;
    }
@@ -104,11 +193,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    subpicture->width = width;
    subpicture->height = height;
    subpicture->num_palette_entries = 0;
-   subpicture->entry_bytes = 0;
-   subpicture->component_order[0] = 0;
-   subpicture->component_order[1] = 0;
-   subpicture->component_order[2] = 0;
-   subpicture->component_order[3] = 0;
+   subpicture->entry_bytes = PipeToComponentOrder(template.format, subpicture->component_order);
    subpicture->privData = subpicture_priv;
 
    SyncHandle();
@@ -184,10 +269,9 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
       return BadAlloc;
    }
 
-   switch (image->id)
-   {
+   switch (image->id) {
       case FOURCC_RGB:
-         assert(subpicture_priv->sfc->format == PIPE_FORMAT_X8R8G8B8_UNORM);
+         assert(subpicture_priv->sfc->format == XvIDToPipe(image->id));
          for (y = 0; y < height; ++y) {
             for (x = 0; x < width; ++x, src += 3, dst += 4) {
                /* TODO: Confirm or fix */
@@ -198,7 +282,7 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
          }
          break;
       default:
-         assert(false);
+         XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized Xv image ID 0x%08X.\n", image->id);
    }
 
    screen->transfer_unmap(screen, xfer);
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 82f6fcad712..1e2dfb4223a 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -43,6 +43,8 @@ typedef struct
 {
    struct vl_context *vctx;
    struct pipe_surface *backbuffer;
+   unsigned short subpicture_max_width;
+   unsigned short subpicture_max_height;
 } XvMCContextPrivate;
 
 typedef struct
-- 
cgit v1.2.3


From edca5360cab6063a1dc2a388da800ca01eb86a42 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 24 Apr 2010 13:52:23 -0400
Subject: vl: Fix up configure.ac/Makefile

---
 configure.ac                                  | 9 ++++++---
 src/gallium/winsys/g3dvl/drm/nouveau/Makefile | 2 +-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/configure.ac b/configure.ac
index 90b105cff53..f9b5a32782e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1354,7 +1354,6 @@ if test "x$enable_gallium_nouveau" = xyes; then
 fi
 
 dnl
-<<<<<<< HEAD
 dnl Gallium G3DVL configuration
 dnl
 AC_ARG_ENABLE([gallium-g3dvl],
@@ -1367,10 +1366,15 @@ if test "x$enable_gallium_g3dvl" = xyes; then
     for dir in $GALLIUM_WINSYS_DIRS; do
         vl_winsys_dirs="$vl_winsys_dirs g3dvl/$dir"
     done
+    # Hack, g3dvl dri state tracker is in winsys/g3dvl/dri
+    # and needs to be built before the drm bits
+    if test "$mesa_driver" = dri; then
+        vl_winsys_dirs="g3dvl/dri $vl_winsys_dirs"
+    fi
     GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS $vl_winsys_dirs"
 fi
 
-=======
+dnl
 dnl Gallium swrast configuration
 dnl
 AC_ARG_ENABLE([gallium-swrast],
@@ -1384,7 +1388,6 @@ fi
 
 dnl prepend CORE_DIRS to SRC_DIRS
 SRC_DIRS="$CORE_DIRS $SRC_DIRS"
->>>>>>> origin/master
 
 dnl Restore LDFLAGS and CPPFLAGS
 LDFLAGS="$_SAVE_LDFLAGS"
diff --git a/src/gallium/winsys/g3dvl/drm/nouveau/Makefile b/src/gallium/winsys/g3dvl/drm/nouveau/Makefile
index 7ff448421a6..fe41ac9269e 100644
--- a/src/gallium/winsys/g3dvl/drm/nouveau/Makefile
+++ b/src/gallium/winsys/g3dvl/drm/nouveau/Makefile
@@ -7,7 +7,7 @@ include $(TOP)/configs/current
 C_SOURCES =
 
 DRIVER_INCLUDES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-I) \
-                   -I$(TOP)/src/gallium/winsys/drm/nouveau \
+                   -I$(TOP)/src/gallium/winsys/drm/nouveau
 DRIVER_DEFINES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-other)
 
 PIPE_DRIVERS = \
-- 
cgit v1.2.3


From 356473121c67fe626ca1df7d44fadfa601c3fe48 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 24 Apr 2010 13:52:47 -0400
Subject: vl: Get rid of pipe_video_surface on the nouveau side as well.

---
 src/gallium/drivers/nv40/nv40_miptree.c | 53 ---------------------------------
 src/gallium/drivers/nv40/nv40_state.h   | 12 --------
 2 files changed, 65 deletions(-)

diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
index 8c74417c9ad..85d7e1f1972 100644
--- a/src/gallium/drivers/nv40/nv40_miptree.c
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -223,57 +223,6 @@ nv40_miptree_surface_del(struct pipe_surface *ps)
 	FREE(ps);
 }
 
-static struct pipe_video_surface*
-nv40_video_surface_new(struct pipe_screen *screen,
-                       enum pipe_video_chroma_format chroma_format,
-                       unsigned width, unsigned height)
-{
-   struct nv40_video_surface *nv40_vsfc;
-   struct pipe_texture template;
-
-   assert(screen);
-   assert(width && height);
-
-   nv40_vsfc = CALLOC_STRUCT(nv40_video_surface);
-   if (!nv40_vsfc)
-      return NULL;
-
-   pipe_reference_init(&nv40_vsfc->base.reference, 1);
-   nv40_vsfc->base.screen = screen;
-   nv40_vsfc->base.chroma_format = chroma_format;
-   /*nv40_vsfc->base.surface_format = PIPE_VIDEO_SURFACE_FORMAT_VUYA;*/
-   nv40_vsfc->base.width = width;
-   nv40_vsfc->base.height = height;
-
-   memset(&template, 0, sizeof(struct pipe_texture));
-   template.target = PIPE_TEXTURE_2D;
-   template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
-   template.last_level = 0;
-   /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */
-   template.width0 = util_next_power_of_two(width);
-   template.height0 = util_next_power_of_two(height);
-   template.depth0 = 1;
-   template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
-
-   nv40_vsfc->tex = screen->texture_create(screen, &template);
-   if (!nv40_vsfc->tex) {
-      FREE(nv40_vsfc);
-      return NULL;
-   }
-
-   return &nv40_vsfc->base;
-}
-
-
-static void
-nv40_video_surface_del(struct pipe_video_surface *vsfc)
-{
-   struct nv40_video_surface *nv40_vsfc = nv40_video_surface(vsfc);
-
-   pipe_texture_reference(&nv40_vsfc->tex, NULL);
-   FREE(nv40_vsfc);
-}
-
 void
 nv40_screen_init_miptree_functions(struct pipe_screen *pscreen)
 {
@@ -282,7 +231,5 @@ nv40_screen_init_miptree_functions(struct pipe_screen *pscreen)
 	pscreen->texture_destroy = nv40_miptree_destroy;
 	pscreen->get_tex_surface = nv40_miptree_surface_new;
 	pscreen->tex_surface_destroy = nv40_miptree_surface_del;
-        pscreen->video_surface_create = nv40_video_surface_new;
-        pscreen->video_surface_destroy = nv40_video_surface_del;
 }
 
diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h
index 5754ca2328a..6436a232487 100644
--- a/src/gallium/drivers/nv40/nv40_state.h
+++ b/src/gallium/drivers/nv40/nv40_state.h
@@ -87,16 +87,4 @@ struct nv40_miptree {
 	} level[PIPE_MAX_TEXTURE_LEVELS];
 };
 
-struct nv40_video_surface {
-	struct pipe_video_surface base;
-	struct pipe_texture *tex;
-};
-
-
-static INLINE struct nv40_video_surface*
-nv40_video_surface(struct pipe_video_surface *sfc)
-{
-   return (struct nv40_video_surface*)sfc;
-}
-
 #endif
-- 
cgit v1.2.3


From f64d0cf524b2203e648a060366a2e4220096aa1f Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 24 Apr 2010 15:37:25 -0400
Subject: vl: Adapt to dri changes.

---
 src/gallium/drivers/nv40/nv40_screen.c         |  2 ++
 src/gallium/drivers/nv40/nv40_video_context.c  | 15 ++++++++++-----
 src/gallium/drivers/nv40/nv40_video_context.h  |  9 +++------
 src/gallium/include/pipe/p_screen.h            |  9 +++++++--
 src/gallium/winsys/g3dvl/dri/dri_winsys.c      | 12 ++++++------
 src/gallium/winsys/g3dvl/drm/Makefile.template | 10 ++--------
 src/gallium/winsys/g3dvl/drm/nouveau/Makefile  |  3 ---
 7 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c
index b216c5e38c9..779b7cdcb6f 100644
--- a/src/gallium/drivers/nv40/nv40_screen.c
+++ b/src/gallium/drivers/nv40/nv40_screen.c
@@ -1,6 +1,7 @@
 #include "pipe/p_screen.h"
 
 #include "nv40_context.h"
+#include "nv40_video_context.h"
 #include "nv40_screen.h"
 
 #define NV4X_GRCLASS4097_CHIPSETS 0x00000baf
@@ -199,6 +200,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	pscreen->get_paramf = nv40_screen_get_paramf;
 	pscreen->is_format_supported = nv40_screen_surface_format_supported;
 	pscreen->context_create = nv40_create;
+	pscreen->video_context_create = nv40_video_create;
 
 	nv40_screen_init_miptree_functions(pscreen);
 	nv40_screen_init_transfer_functions(pscreen);
diff --git a/src/gallium/drivers/nv40/nv40_video_context.c b/src/gallium/drivers/nv40/nv40_video_context.c
index e2985e5a9c8..15a26ea3b3b 100644
--- a/src/gallium/drivers/nv40/nv40_video_context.c
+++ b/src/gallium/drivers/nv40/nv40_video_context.c
@@ -28,14 +28,19 @@
 #include "nv40_video_context.h"
 #include <softpipe/sp_video_context.h>
 
-
 struct pipe_video_context *
-nv40_video_create(struct pipe_context *pipe, enum pipe_video_profile profile,
+nv40_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
                   enum pipe_video_chroma_format chroma_format,
-                  unsigned width, unsigned height,
-                  unsigned pvctx_id)
+                  unsigned width, unsigned height, void *priv)
 {
-   assert(pipe);
+   struct pipe_context *pipe;
+
+   assert(screen);
+
+   pipe = screen->context_create(screen, priv);
+   if (!pipe)
+      return NULL;
+
    return sp_video_create_ex(pipe, profile, chroma_format, width, height,
                              VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
                              VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
diff --git a/src/gallium/drivers/nv40/nv40_video_context.h b/src/gallium/drivers/nv40/nv40_video_context.h
index 206a342e35e..64196caca72 100644
--- a/src/gallium/drivers/nv40/nv40_video_context.h
+++ b/src/gallium/drivers/nv40/nv40_video_context.h
@@ -30,12 +30,9 @@
 
 #include <pipe/p_video_context.h>
 
-struct pipe_context;
-
-struct pipe_video_context*
-nv40_video_create(struct pipe_context *pipe, enum pipe_video_profile profile,
+struct pipe_video_context *
+nv40_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
                   enum pipe_video_chroma_format chroma_format,
-                  unsigned width, unsigned height,
-                  unsigned pvctx_id);
+                  unsigned width, unsigned height, void *priv);
 
 #endif
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index b8e007ec8ac..cf6a9c5e2f0 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -86,8 +86,13 @@ struct pipe_screen {
    float (*get_paramf)( struct pipe_screen *, int param );
 
    struct pipe_context * (*context_create)( struct pipe_screen *,
-					    void *priv );
-   
+                                            void *priv );
+
+   struct pipe_video_context * (*video_context_create)( struct pipe_screen *screen,
+                                                        enum pipe_video_profile profile,
+                                                        enum pipe_video_chroma_format chroma_format,
+                                                        unsigned width, unsigned height, void *priv );
+
    /**
     * Check if the given pipe_format is supported as a texture or
     * drawing surface.
diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
index da54c0976f6..c8ff8fae169 100644
--- a/src/gallium/winsys/g3dvl/dri/dri_winsys.c
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -358,18 +358,18 @@ vl_video_create(struct vl_screen *vscreen,
    /* XXX: Is default visual correct/sufficient here? */
    driCreateContext(vl_dri_scrn->dri_screen, vl_dri_scrn->visual, &vl_dri_ctx->dri_context);
 
-   if (!vl_dri_scrn->api->create_video_context) {
+   if (!vscreen->pscreen->video_context_create) {
       debug_printf("[G3DVL] No video support found on %s/%s.\n",
-                   vl_dri_scrn->base.pscreen->get_vendor(vl_dri_scrn->base.pscreen),
-                   vl_dri_scrn->base.pscreen->get_name(vl_dri_scrn->base.pscreen));
+                   vscreen->pscreen->get_vendor(vscreen->pscreen),
+                   vscreen->pscreen->get_name(vscreen->pscreen));
       FREE(vl_dri_ctx);
       return NULL;
    }
 
-   vl_dri_ctx->base.vpipe = vl_dri_scrn->api->create_video_context(vl_dri_scrn->api,
-                                                                   vscreen->pscreen,
+   vl_dri_ctx->base.vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen,
                                                                    profile, chroma_format,
-                                                                   width, height);
+                                                                   width, height,
+                                                                   vl_dri_ctx->dri_context);
 
    if (!vl_dri_ctx->base.vpipe) {
       FREE(vl_dri_ctx);
diff --git a/src/gallium/winsys/g3dvl/drm/Makefile.template b/src/gallium/winsys/g3dvl/drm/Makefile.template
index a33d95677c5..2b79deef4bc 100644
--- a/src/gallium/winsys/g3dvl/drm/Makefile.template
+++ b/src/gallium/winsys/g3dvl/drm/Makefile.template
@@ -15,7 +15,7 @@ INCLUDES = $(DRIVER_INCLUDES) \
 
 DEFINES += $(DRIVER_DEFINES) \
            -DGALLIUM_SOFTPIPE \
-	   -DGALLIUM_TRACE
+           -DGALLIUM_TRACE
 
 # XXX: Hack, if we include libxvmctracker.a in LIBS none of the symbols are
 # pulled in by the linker because xsp_winsys.c doesn't refer to them
@@ -23,13 +23,7 @@ OBJECTS = $(C_SOURCES:.c=.o) $(TOP)/src/gallium/state_trackers/xorg/xvmc/*.o
 
 LIBS = $(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
        $(PIPE_DRIVERS) \
-       $(TOP)/src/gallium/auxiliary/vl/libvl.a \
-       $(TOP)/src/gallium/auxiliary/draw/libdraw.a \
-       $(TOP)/src/gallium/auxiliary/tgsi/libtgsi.a \
-       $(TOP)/src/gallium/auxiliary/translate/libtranslate.a \
-       $(TOP)/src/gallium/auxiliary/cso_cache/libcso_cache.a \
-       $(TOP)/src/gallium/auxiliary/rtasm/librtasm.a \
-       $(TOP)/src/gallium/auxiliary/util/libutil.a
+       $(TOP)/src/gallium/auxiliary/libgallium.a
 
 .c.o:
 	$(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@
diff --git a/src/gallium/winsys/g3dvl/drm/nouveau/Makefile b/src/gallium/winsys/g3dvl/drm/nouveau/Makefile
index fe41ac9269e..5c55186f672 100644
--- a/src/gallium/winsys/g3dvl/drm/nouveau/Makefile
+++ b/src/gallium/winsys/g3dvl/drm/nouveau/Makefile
@@ -12,9 +12,6 @@ DRIVER_DEFINES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-other)
 
 PIPE_DRIVERS = \
 	$(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \
-	$(TOP)/src/gallium/drivers/nv04/libnv04.a \
-	$(TOP)/src/gallium/drivers/nv10/libnv10.a \
-	$(TOP)/src/gallium/drivers/nv20/libnv20.a \
 	$(TOP)/src/gallium/drivers/nv30/libnv30.a \
 	$(TOP)/src/gallium/drivers/nv40/libnv40.a \
 	$(TOP)/src/gallium/drivers/nv50/libnv50.a \
-- 
cgit v1.2.3


From 3107b54b011c7ceef2b314632bdcf0b87c5e4d36 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 24 Apr 2010 21:16:59 -0400
Subject: st/xvmc: Mark exported funcs with PUBLIC.

---
 src/gallium/state_trackers/xorg/xvmc/attributes.c |  4 ++++
 src/gallium/state_trackers/xorg/xvmc/block.c      |  4 ++++
 src/gallium/state_trackers/xorg/xvmc/context.c    |  2 ++
 src/gallium/state_trackers/xorg/xvmc/subpicture.c | 10 ++++++++++
 src/gallium/state_trackers/xorg/xvmc/surface.c    |  8 ++++++++
 5 files changed, 28 insertions(+)

diff --git a/src/gallium/state_trackers/xorg/xvmc/attributes.c b/src/gallium/state_trackers/xorg/xvmc/attributes.c
index 79a67838e6e..d23d8635b66 100644
--- a/src/gallium/state_trackers/xorg/xvmc/attributes.c
+++ b/src/gallium/state_trackers/xorg/xvmc/attributes.c
@@ -29,17 +29,21 @@
 #include <X11/Xlib.h>
 #include <X11/extensions/Xvlib.h>
 #include <X11/extensions/XvMClib.h>
+#include <pipe/p_compiler.h>
 
+PUBLIC
 XvAttribute* XvMCQueryAttributes(Display *dpy, XvMCContext *context, int *number)
 {
    return NULL;
 }
 
+PUBLIC
 Status XvMCSetAttribute(Display *dpy, XvMCContext *context, Atom attribute, int value)
 {
    return BadImplementation;
 }
 
+PUBLIC
 Status XvMCGetAttribute(Display *dpy, XvMCContext *context, Atom attribute, int *value)
 {
    return BadImplementation;
diff --git a/src/gallium/state_trackers/xorg/xvmc/block.c b/src/gallium/state_trackers/xorg/xvmc/block.c
index 5102375fcf8..c7da7a84a7b 100644
--- a/src/gallium/state_trackers/xorg/xvmc/block.c
+++ b/src/gallium/state_trackers/xorg/xvmc/block.c
@@ -31,6 +31,7 @@
 #include <util/u_memory.h>
 #include "xvmc_private.h"
 
+PUBLIC
 Status XvMCCreateBlocks(Display *dpy, XvMCContext *context, unsigned int num_blocks, XvMCBlockArray *blocks)
 {
    assert(dpy);
@@ -50,6 +51,7 @@ Status XvMCCreateBlocks(Display *dpy, XvMCContext *context, unsigned int num_blo
    return Success;
 }
 
+PUBLIC
 Status XvMCDestroyBlocks(Display *dpy, XvMCBlockArray *blocks)
 {
    assert(dpy);
@@ -59,6 +61,7 @@ Status XvMCDestroyBlocks(Display *dpy, XvMCBlockArray *blocks)
    return Success;
 }
 
+PUBLIC
 Status XvMCCreateMacroBlocks(Display *dpy, XvMCContext *context, unsigned int num_blocks, XvMCMacroBlockArray *blocks)
 {
    assert(dpy);
@@ -78,6 +81,7 @@ Status XvMCCreateMacroBlocks(Display *dpy, XvMCContext *context, unsigned int nu
    return Success;
 }
 
+PUBLIC
 Status XvMCDestroyMacroBlocks(Display *dpy, XvMCMacroBlockArray *blocks)
 {
    assert(dpy);
diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index 586cc1f45ac..5e4af9e555a 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -172,6 +172,7 @@ static enum pipe_video_chroma_format FormatToPipe(int xvmc_format)
    return -1;
 }
 
+PUBLIC
 Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
                          int width, int height, int flags, XvMCContext *context)
 {
@@ -268,6 +269,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
    return Success;
 }
 
+PUBLIC
 Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
 {
    struct vl_screen *vscreen;
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index a6d75f63259..5bfccfaf37f 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -126,6 +126,7 @@ static Status Validate(Display *dpy, XvPortID port, int surface_type_id, int xvi
    return i < num_subpics ? Success : BadMatch;
 }
 
+PUBLIC
 Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *subpicture,
                             unsigned short width, unsigned short height, int xvimage_id)
 {
@@ -203,6 +204,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    return Success;
 }
 
+PUBLIC
 Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, short y,
                            unsigned short width, unsigned short height, unsigned int color)
 {
@@ -224,6 +226,7 @@ Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, sh
    return Success;
 }
 
+PUBLIC
 Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage *image,
                                short srcx, short srcy, unsigned short width, unsigned short height,
                                short dstx, short dsty)
@@ -293,6 +296,7 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
    return Success;
 }
 
+PUBLIC
 Status XvMCDestroySubpicture(Display *dpy, XvMCSubpicture *subpicture)
 {
    XvMCSubpicturePrivate *subpicture_priv;
@@ -313,6 +317,7 @@ Status XvMCDestroySubpicture(Display *dpy, XvMCSubpicture *subpicture)
    return Success;
 }
 
+PUBLIC
 Status XvMCSetSubpicturePalette(Display *dpy, XvMCSubpicture *subpicture, unsigned char *palette)
 {
    assert(dpy);
@@ -326,6 +331,7 @@ Status XvMCSetSubpicturePalette(Display *dpy, XvMCSubpicture *subpicture, unsign
    return BadMatch;
 }
 
+PUBLIC
 Status XvMCBlendSubpicture(Display *dpy, XvMCSurface *target_surface, XvMCSubpicture *subpicture,
                            short subx, short suby, unsigned short subw, unsigned short subh,
                            short surfx, short surfy, unsigned short surfw, unsigned short surfh)
@@ -367,6 +373,7 @@ Status XvMCBlendSubpicture(Display *dpy, XvMCSurface *target_surface, XvMCSubpic
    return Success;
 }
 
+PUBLIC
 Status XvMCBlendSubpicture2(Display *dpy, XvMCSurface *source_surface, XvMCSurface *target_surface,
                             XvMCSubpicture *subpicture, short subx, short suby, unsigned short subw, unsigned short subh,
                             short surfx, short surfy, unsigned short surfw, unsigned short surfh)
@@ -390,6 +397,7 @@ Status XvMCBlendSubpicture2(Display *dpy, XvMCSurface *source_surface, XvMCSurfa
    return Success;
 }
 
+PUBLIC
 Status XvMCSyncSubpicture(Display *dpy, XvMCSubpicture *subpicture)
 {
    assert(dpy);
@@ -400,6 +408,7 @@ Status XvMCSyncSubpicture(Display *dpy, XvMCSubpicture *subpicture)
    return Success;
 }
 
+PUBLIC
 Status XvMCFlushSubpicture(Display *dpy, XvMCSubpicture *subpicture)
 {
    assert(dpy);
@@ -410,6 +419,7 @@ Status XvMCFlushSubpicture(Display *dpy, XvMCSubpicture *subpicture)
    return Success;
 }
 
+PUBLIC
 Status XvMCGetSubpictureStatus(Display *dpy, XvMCSubpicture *subpicture, int *status)
 {
    assert(dpy);
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 998a7af0e95..42df9e49ea5 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -184,6 +184,7 @@ MacroBlocksToPipe(struct pipe_screen *screen,
    }
 }
 
+PUBLIC
 Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surface)
 {
    XvMCContextPrivate *context_priv;
@@ -261,6 +262,7 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
    return Success;
 }
 
+PUBLIC
 Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int picture_structure,
                          XvMCSurface *target_surface, XvMCSurface *past_surface, XvMCSurface *future_surface,
                          unsigned int flags, unsigned int num_macroblocks, unsigned int first_macroblock,
@@ -334,6 +336,7 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    return Success;
 }
 
+PUBLIC
 Status XvMCFlushSurface(Display *dpy, XvMCSurface *surface)
 {
    assert(dpy);
@@ -344,6 +347,7 @@ Status XvMCFlushSurface(Display *dpy, XvMCSurface *surface)
    return Success;
 }
 
+PUBLIC
 Status XvMCSyncSurface(Display *dpy, XvMCSurface *surface)
 {
    assert(dpy);
@@ -354,6 +358,7 @@ Status XvMCSyncSurface(Display *dpy, XvMCSurface *surface)
    return Success;
 }
 
+PUBLIC
 Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
                       short srcx, short srcy, unsigned short srcw, unsigned short srch,
                       short destx, short desty, unsigned short destw, unsigned short desth,
@@ -442,6 +447,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    return Success;
 }
 
+PUBLIC
 Status XvMCGetSurfaceStatus(Display *dpy, XvMCSurface *surface, int *status)
 {
    assert(dpy);
@@ -456,6 +462,7 @@ Status XvMCGetSurfaceStatus(Display *dpy, XvMCSurface *surface, int *status)
    return Success;
 }
 
+PUBLIC
 Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
 {
    XvMCSurfacePrivate *surface_priv;
@@ -477,6 +484,7 @@ Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
    return Success;
 }
 
+PUBLIC
 Status XvMCHideSurface(Display *dpy, XvMCSurface *surface)
 {
    assert(dpy);
-- 
cgit v1.2.3


From 404fb63b4649f58fce443615e49337d42b8ddece Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Wed, 28 Apr 2010 19:51:10 -0400
Subject: vl: WIP DRI2 support in the winsys.

---
 src/gallium/winsys/g3dvl/dri/Makefile     |   1 +
 src/gallium/winsys/g3dvl/dri/dri2.c       | 669 ++++++++++++++++++++++++++++++
 src/gallium/winsys/g3dvl/dri/dri2.h       | 106 +++++
 src/gallium/winsys/g3dvl/dri/dri_winsys.c | 145 +++++--
 src/gallium/winsys/g3dvl/dri/driclient.c  |  71 +++-
 src/gallium/winsys/g3dvl/dri/driclient.h  |  19 +
 6 files changed, 979 insertions(+), 32 deletions(-)
 create mode 100644 src/gallium/winsys/g3dvl/dri/dri2.c
 create mode 100644 src/gallium/winsys/g3dvl/dri/dri2.h

diff --git a/src/gallium/winsys/g3dvl/dri/Makefile b/src/gallium/winsys/g3dvl/dri/Makefile
index dcd58ef6d2e..b270e780fec 100644
--- a/src/gallium/winsys/g3dvl/dri/Makefile
+++ b/src/gallium/winsys/g3dvl/dri/Makefile
@@ -9,6 +9,7 @@ LIBRARY_INCLUDES = -I$(TOP)/src/gallium/winsys/g3dvl \
 C_SOURCES = \
 	driclient.c \
 	XF86dri.c \
+	dri2.c \
 	dri_winsys.c
 
 include ../../../Makefile.template
diff --git a/src/gallium/winsys/g3dvl/dri/dri2.c b/src/gallium/winsys/g3dvl/dri/dri2.c
new file mode 100644
index 00000000000..23fb7780882
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/dri/dri2.c
@@ -0,0 +1,669 @@
+/*
+ * Copyright © 2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Soft-
+ * ware"), to deal in the Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge, publish, distribute,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, provided that the above copyright
+ * notice(s) and this permission notice appear in all copies of the Soft-
+ * ware and that both the above copyright notice(s) and this permission
+ * notice appear in supporting documentation.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+ * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY
+ * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN
+ * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE-
+ * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR-
+ * MANCE OF THIS SOFTWARE.
+ *
+ * Except as contained in this notice, the name of a copyright holder shall
+ * not be used in advertising or otherwise to promote the sale, use or
+ * other dealings in this Software without prior written authorization of
+ * the copyright holder.
+ *
+ * Authors:
+ *   Kristian Høgsberg (krh@redhat.com)
+ */
+
+
+#define NEED_REPLIES
+#include <stdio.h>
+#include <X11/Xlibint.h>
+#include <X11/extensions/Xext.h>
+#include <X11/extensions/extutil.h>
+#include <X11/extensions/dri2proto.h>
+#include "xf86drm.h"
+#include "dri2.h"
+#if 0
+#include "glxclient.h"
+#include "GL/glxext.h"
+#endif
+
+/* Allow the build to work with an older versions of dri2proto.h and
+ * dri2tokens.h.
+ */
+#if DRI2_MINOR < 1
+#undef DRI2_MINOR
+#define DRI2_MINOR 1
+#define X_DRI2GetBuffersWithFormat 7
+#endif
+
+
+static char dri2ExtensionName[] = DRI2_NAME;
+static XExtensionInfo *dri2Info;
+static XEXT_GENERATE_CLOSE_DISPLAY (DRI2CloseDisplay, dri2Info)
+
+static Bool
+DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire);
+static Status
+DRI2EventToWire(Display *dpy, XEvent *event, xEvent *wire);
+
+static /* const */ XExtensionHooks dri2ExtensionHooks = {
+  NULL,                   /* create_gc */
+  NULL,                   /* copy_gc */
+  NULL,                   /* flush_gc */
+  NULL,                   /* free_gc */
+  NULL,                   /* create_font */
+  NULL,                   /* free_font */
+  DRI2CloseDisplay,       /* close_display */
+  DRI2WireToEvent,        /* wire_to_event */
+  DRI2EventToWire,        /* event_to_wire */
+  NULL,                   /* error */
+  NULL,                   /* error_string */
+};
+
+static XEXT_GENERATE_FIND_DISPLAY (DRI2FindDisplay,
+                                   dri2Info,
+                                   dri2ExtensionName,
+                                   &dri2ExtensionHooks,
+                                   0, NULL)
+
+static Bool
+DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire)
+{
+#if 0
+   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+   XExtDisplayInfo *glx_info = __glXFindDisplay(dpy);
+
+   XextCheckExtension(dpy, info, dri2ExtensionName, False);
+
+   switch ((wire->u.u.type & 0x7f) - info->codes->first_event) {
+
+#ifdef X_DRI2SwapBuffers
+   case DRI2_BufferSwapComplete:
+   {
+      GLXBufferSwapComplete *aevent = (GLXBufferSwapComplete *)event;
+      xDRI2BufferSwapComplete *awire = (xDRI2BufferSwapComplete *)wire;
+      aevent->serial = _XSetLastRequestRead(dpy, (xGenericReply *) wire);
+      aevent->type =
+	  (glx_info->codes->first_event + GLX_BufferSwapComplete) & 0x75;
+      aevent->send_event = (awire->type & 0x80) != 0;
+      aevent->display = dpy;
+      aevent->drawable = awire->drawable;
+      switch (awire->event_type) {
+      case DRI2_EXCHANGE_COMPLETE:
+	 aevent->event_type = GLX_EXCHANGE_COMPLETE_INTEL;
+	 break;
+      case DRI2_BLIT_COMPLETE:
+	 aevent->event_type = GLX_BLIT_COMPLETE_INTEL;
+	 break;
+      case DRI2_FLIP_COMPLETE:
+	 aevent->event_type = GLX_FLIP_COMPLETE_INTEL;
+	 break;
+      default:
+	 /* unknown swap completion type */
+	 return False;
+      }
+      aevent->ust = ((CARD64)awire->ust_hi << 32) | awire->ust_lo;
+      aevent->msc = ((CARD64)awire->msc_hi << 32) | awire->msc_lo;
+      aevent->sbc = ((CARD64)awire->sbc_hi << 32) | awire->sbc_lo;
+      return True;
+   }
+#endif
+#ifdef DRI2_InvalidateBuffers
+   case DRI2_InvalidateBuffers:
+   {
+      xDRI2InvalidateBuffers *awire = (xDRI2InvalidateBuffers *)wire;
+
+      dri2InvalidateBuffers(dpy, awire->drawable);
+      return False;
+   }
+#endif
+   default:
+      /* client doesn't support server event */
+      break;
+   }
+#endif
+   return False;
+}
+
+/* We don't actually support this.  It doesn't make sense for clients to
+ * send each other DRI2 events.
+ */
+static Status
+DRI2EventToWire(Display *dpy, XEvent *event, xEvent *wire)
+{
+   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+
+   XextCheckExtension(dpy, info, dri2ExtensionName, False);
+
+   switch (event->type) {
+   default:
+      /* client doesn't support server event */
+      break;
+   }
+
+   return Success;
+}
+
+Bool
+DRI2QueryExtension(Display * dpy, int *eventBase, int *errorBase)
+{
+   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+
+   if (XextHasExtension(info)) {
+      *eventBase = info->codes->first_event;
+      *errorBase = info->codes->first_error;
+      return True;
+   }
+
+   return False;
+}
+
+Bool
+DRI2QueryVersion(Display * dpy, int *major, int *minor)
+{
+   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+   xDRI2QueryVersionReply rep;
+   xDRI2QueryVersionReq *req;
+   int i, nevents;
+
+   XextCheckExtension(dpy, info, dri2ExtensionName, False);
+
+   LockDisplay(dpy);
+   GetReq(DRI2QueryVersion, req);
+   req->reqType = info->codes->major_opcode;
+   req->dri2ReqType = X_DRI2QueryVersion;
+   req->majorVersion = DRI2_MAJOR;
+   req->minorVersion = DRI2_MINOR;
+   if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) {
+      UnlockDisplay(dpy);
+      SyncHandle();
+      return False;
+   }
+   *major = rep.majorVersion;
+   *minor = rep.minorVersion;
+   UnlockDisplay(dpy);
+   SyncHandle();
+
+   switch (rep.minorVersion) {
+   case 1:
+	   nevents = 0;
+	   break;
+   case 2:
+	   nevents = 1;
+	   break;
+   case 3:
+   default:
+	   nevents = 2;
+	   break;
+   }
+	
+   for (i = 0; i < nevents; i++) {
+       XESetWireToEvent (dpy, info->codes->first_event + i, DRI2WireToEvent);
+       XESetEventToWire (dpy, info->codes->first_event + i, DRI2EventToWire);
+   }
+
+   return True;
+}
+
+Bool
+DRI2Connect(Display * dpy, XID window, char **driverName, char **deviceName)
+{
+   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+   xDRI2ConnectReply rep;
+   xDRI2ConnectReq *req;
+
+   XextCheckExtension(dpy, info, dri2ExtensionName, False);
+
+   LockDisplay(dpy);
+   GetReq(DRI2Connect, req);
+   req->reqType = info->codes->major_opcode;
+   req->dri2ReqType = X_DRI2Connect;
+   req->window = window;
+   req->driverType = DRI2DriverDRI;
+   if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) {
+      UnlockDisplay(dpy);
+      SyncHandle();
+      return False;
+   }
+
+   if (rep.driverNameLength == 0 && rep.deviceNameLength == 0) {
+      UnlockDisplay(dpy);
+      SyncHandle();
+      return False;
+   }
+
+   *driverName = Xmalloc(rep.driverNameLength + 1);
+   if (*driverName == NULL) {
+      _XEatData(dpy,
+                ((rep.driverNameLength + 3) & ~3) +
+                ((rep.deviceNameLength + 3) & ~3));
+      UnlockDisplay(dpy);
+      SyncHandle();
+      return False;
+   }
+   _XReadPad(dpy, *driverName, rep.driverNameLength);
+   (*driverName)[rep.driverNameLength] = '\0';
+
+   *deviceName = Xmalloc(rep.deviceNameLength + 1);
+   if (*deviceName == NULL) {
+      Xfree(*driverName);
+      _XEatData(dpy, ((rep.deviceNameLength + 3) & ~3));
+      UnlockDisplay(dpy);
+      SyncHandle();
+      return False;
+   }
+   _XReadPad(dpy, *deviceName, rep.deviceNameLength);
+   (*deviceName)[rep.deviceNameLength] = '\0';
+
+   UnlockDisplay(dpy);
+   SyncHandle();
+
+   return True;
+}
+
+Bool
+DRI2Authenticate(Display * dpy, XID window, drm_magic_t magic)
+{
+   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+   xDRI2AuthenticateReq *req;
+   xDRI2AuthenticateReply rep;
+
+   XextCheckExtension(dpy, info, dri2ExtensionName, False);
+
+   LockDisplay(dpy);
+   GetReq(DRI2Authenticate, req);
+   req->reqType = info->codes->major_opcode;
+   req->dri2ReqType = X_DRI2Authenticate;
+   req->window = window;
+   req->magic = magic;
+
+   if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) {
+      UnlockDisplay(dpy);
+      SyncHandle();
+      return False;
+   }
+
+   UnlockDisplay(dpy);
+   SyncHandle();
+
+   return rep.authenticated;
+}
+
+void
+DRI2CreateDrawable(Display * dpy, XID drawable)
+{
+   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+   xDRI2CreateDrawableReq *req;
+
+   XextSimpleCheckExtension(dpy, info, dri2ExtensionName);
+
+   LockDisplay(dpy);
+   GetReq(DRI2CreateDrawable, req);
+   req->reqType = info->codes->major_opcode;
+   req->dri2ReqType = X_DRI2CreateDrawable;
+   req->drawable = drawable;
+   UnlockDisplay(dpy);
+   SyncHandle();
+}
+
+void
+DRI2DestroyDrawable(Display * dpy, XID drawable)
+{
+   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+   xDRI2DestroyDrawableReq *req;
+
+   XextSimpleCheckExtension(dpy, info, dri2ExtensionName);
+
+   XSync(dpy, False);
+
+   LockDisplay(dpy);
+   GetReq(DRI2DestroyDrawable, req);
+   req->reqType = info->codes->major_opcode;
+   req->dri2ReqType = X_DRI2DestroyDrawable;
+   req->drawable = drawable;
+   UnlockDisplay(dpy);
+   SyncHandle();
+}
+
+DRI2Buffer *
+DRI2GetBuffers(Display * dpy, XID drawable,
+               int *width, int *height,
+               unsigned int *attachments, int count, int *outCount)
+{
+   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+   xDRI2GetBuffersReply rep;
+   xDRI2GetBuffersReq *req;
+   DRI2Buffer *buffers;
+   xDRI2Buffer repBuffer;
+   CARD32 *p;
+   int i;
+
+   XextCheckExtension(dpy, info, dri2ExtensionName, False);
+
+   LockDisplay(dpy);
+   GetReqExtra(DRI2GetBuffers, count * 4, req);
+   req->reqType = info->codes->major_opcode;
+   req->dri2ReqType = X_DRI2GetBuffers;
+   req->drawable = drawable;
+   req->count = count;
+   p = (CARD32 *) & req[1];
+   for (i = 0; i < count; i++)
+      p[i] = attachments[i];
+
+   if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) {
+      UnlockDisplay(dpy);
+      SyncHandle();
+      return NULL;
+   }
+
+   *width = rep.width;
+   *height = rep.height;
+   *outCount = rep.count;
+
+   buffers = Xmalloc(rep.count * sizeof buffers[0]);
+   if (buffers == NULL) {
+      _XEatData(dpy, rep.count * sizeof repBuffer);
+      UnlockDisplay(dpy);
+      SyncHandle();
+      return NULL;
+   }
+
+   for (i = 0; i < rep.count; i++) {
+      _XReadPad(dpy, (char *) &repBuffer, sizeof repBuffer);
+      buffers[i].attachment = repBuffer.attachment;
+      buffers[i].name = repBuffer.name;
+      buffers[i].pitch = repBuffer.pitch;
+      buffers[i].cpp = repBuffer.cpp;
+      buffers[i].flags = repBuffer.flags;
+   }
+
+   UnlockDisplay(dpy);
+   SyncHandle();
+
+   return buffers;
+}
+
+
+DRI2Buffer *
+DRI2GetBuffersWithFormat(Display * dpy, XID drawable,
+                         int *width, int *height,
+                         unsigned int *attachments, int count, int *outCount)
+{
+   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+   xDRI2GetBuffersReply rep;
+   xDRI2GetBuffersReq *req;
+   DRI2Buffer *buffers;
+   xDRI2Buffer repBuffer;
+   CARD32 *p;
+   int i;
+
+   XextCheckExtension(dpy, info, dri2ExtensionName, False);
+
+   LockDisplay(dpy);
+   GetReqExtra(DRI2GetBuffers, count * (4 * 2), req);
+   req->reqType = info->codes->major_opcode;
+   req->dri2ReqType = X_DRI2GetBuffersWithFormat;
+   req->drawable = drawable;
+   req->count = count;
+   p = (CARD32 *) & req[1];
+   for (i = 0; i < (count * 2); i++)
+      p[i] = attachments[i];
+
+   if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) {
+      UnlockDisplay(dpy);
+      SyncHandle();
+      return NULL;
+   }
+
+   *width = rep.width;
+   *height = rep.height;
+   *outCount = rep.count;
+
+   buffers = Xmalloc(rep.count * sizeof buffers[0]);
+   if (buffers == NULL) {
+      _XEatData(dpy, rep.count * sizeof repBuffer);
+      UnlockDisplay(dpy);
+      SyncHandle();
+      return NULL;
+   }
+
+   for (i = 0; i < rep.count; i++) {
+      _XReadPad(dpy, (char *) &repBuffer, sizeof repBuffer);
+      buffers[i].attachment = repBuffer.attachment;
+      buffers[i].name = repBuffer.name;
+      buffers[i].pitch = repBuffer.pitch;
+      buffers[i].cpp = repBuffer.cpp;
+      buffers[i].flags = repBuffer.flags;
+   }
+
+   UnlockDisplay(dpy);
+   SyncHandle();
+
+   return buffers;
+}
+
+
+void
+DRI2CopyRegion(Display * dpy, XID drawable, XserverRegion region,
+               CARD32 dest, CARD32 src)
+{
+   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+   xDRI2CopyRegionReq *req;
+   xDRI2CopyRegionReply rep;
+
+   XextSimpleCheckExtension(dpy, info, dri2ExtensionName);
+
+   LockDisplay(dpy);
+   GetReq(DRI2CopyRegion, req);
+   req->reqType = info->codes->major_opcode;
+   req->dri2ReqType = X_DRI2CopyRegion;
+   req->drawable = drawable;
+   req->region = region;
+   req->dest = dest;
+   req->src = src;
+
+   _XReply(dpy, (xReply *) & rep, 0, xFalse);
+
+   UnlockDisplay(dpy);
+   SyncHandle();
+}
+
+#ifdef X_DRI2SwapBuffers
+static void
+load_swap_req(xDRI2SwapBuffersReq *req, CARD64 target, CARD64 divisor,
+	     CARD64 remainder)
+{
+    req->target_msc_hi = target >> 32;
+    req->target_msc_lo = target & 0xffffffff;
+    req->divisor_hi = divisor >> 32;
+    req->divisor_lo = divisor & 0xffffffff;
+    req->remainder_hi = remainder >> 32;
+    req->remainder_lo = remainder & 0xffffffff;
+}
+
+static CARD64
+vals_to_card64(CARD32 lo, CARD32 hi)
+{
+    return (CARD64)hi << 32 | lo;
+}
+
+void DRI2SwapBuffers(Display *dpy, XID drawable, CARD64 target_msc,
+		     CARD64 divisor, CARD64 remainder, CARD64 *count)
+{
+    XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+    xDRI2SwapBuffersReq *req;
+    xDRI2SwapBuffersReply rep;
+
+    XextSimpleCheckExtension (dpy, info, dri2ExtensionName);
+
+    LockDisplay(dpy);
+    GetReq(DRI2SwapBuffers, req);
+    req->reqType = info->codes->major_opcode;
+    req->dri2ReqType = X_DRI2SwapBuffers;
+    req->drawable = drawable;
+    load_swap_req(req, target_msc, divisor, remainder);
+
+    _XReply(dpy, (xReply *)&rep, 0, xFalse);
+
+    *count = vals_to_card64(rep.swap_lo, rep.swap_hi);
+
+    UnlockDisplay(dpy);
+    SyncHandle();
+}
+#endif
+
+#ifdef X_DRI2GetMSC
+Bool DRI2GetMSC(Display *dpy, XID drawable, CARD64 *ust, CARD64 *msc,
+		CARD64 *sbc)
+{
+    XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+    xDRI2GetMSCReq *req;
+    xDRI2MSCReply rep;
+
+    XextCheckExtension (dpy, info, dri2ExtensionName, False);
+
+    LockDisplay(dpy);
+    GetReq(DRI2GetMSC, req);
+    req->reqType = info->codes->major_opcode;
+    req->dri2ReqType = X_DRI2GetMSC;
+    req->drawable = drawable;
+
+    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+	UnlockDisplay(dpy);
+	SyncHandle();
+	return False;
+    }
+
+    *ust = vals_to_card64(rep.ust_lo, rep.ust_hi);
+    *msc = vals_to_card64(rep.msc_lo, rep.msc_hi);
+    *sbc = vals_to_card64(rep.sbc_lo, rep.sbc_hi);
+
+    UnlockDisplay(dpy);
+    SyncHandle();
+
+    return True;
+}
+#endif
+
+#ifdef X_DRI2WaitMSC
+static void
+load_msc_req(xDRI2WaitMSCReq *req, CARD64 target, CARD64 divisor,
+	     CARD64 remainder)
+{
+    req->target_msc_hi = target >> 32;
+    req->target_msc_lo = target & 0xffffffff;
+    req->divisor_hi = divisor >> 32;
+    req->divisor_lo = divisor & 0xffffffff;
+    req->remainder_hi = remainder >> 32;
+    req->remainder_lo = remainder & 0xffffffff;
+}
+
+Bool DRI2WaitMSC(Display *dpy, XID drawable, CARD64 target_msc, CARD64 divisor,
+		 CARD64 remainder, CARD64 *ust, CARD64 *msc, CARD64 *sbc)
+{
+    XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+    xDRI2WaitMSCReq *req;
+    xDRI2MSCReply rep;
+
+    XextCheckExtension (dpy, info, dri2ExtensionName, False);
+
+    LockDisplay(dpy);
+    GetReq(DRI2WaitMSC, req);
+    req->reqType = info->codes->major_opcode;
+    req->dri2ReqType = X_DRI2WaitMSC;
+    req->drawable = drawable;
+    load_msc_req(req, target_msc, divisor, remainder);
+
+    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+	UnlockDisplay(dpy);
+	SyncHandle();
+	return False;
+    }
+
+    *ust = ((CARD64)rep.ust_hi << 32) | (CARD64)rep.ust_lo;
+    *msc = ((CARD64)rep.msc_hi << 32) | (CARD64)rep.msc_lo;
+    *sbc = ((CARD64)rep.sbc_hi << 32) | (CARD64)rep.sbc_lo;
+
+    UnlockDisplay(dpy);
+    SyncHandle();
+
+    return True;
+}
+#endif
+
+#ifdef X_DRI2WaitSBC
+static void
+load_sbc_req(xDRI2WaitSBCReq *req, CARD64 target)
+{
+    req->target_sbc_hi = target >> 32;
+    req->target_sbc_lo = target & 0xffffffff;
+}
+
+Bool DRI2WaitSBC(Display *dpy, XID drawable, CARD64 target_sbc, CARD64 *ust,
+		 CARD64 *msc, CARD64 *sbc)
+{
+    XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+    xDRI2WaitSBCReq *req;
+    xDRI2MSCReply rep;
+
+    XextCheckExtension (dpy, info, dri2ExtensionName, False);
+
+    LockDisplay(dpy);
+    GetReq(DRI2WaitSBC, req);
+    req->reqType = info->codes->major_opcode;
+    req->dri2ReqType = X_DRI2WaitSBC;
+    req->drawable = drawable;
+    load_sbc_req(req, target_sbc);
+
+    if (!_XReply(dpy, (xReply *)&rep, 0, xFalse)) {
+	UnlockDisplay(dpy);
+	SyncHandle();
+	return False;
+    }
+
+    *ust = ((CARD64)rep.ust_hi << 32) | rep.ust_lo;
+    *msc = ((CARD64)rep.msc_hi << 32) | rep.msc_lo;
+    *sbc = ((CARD64)rep.sbc_hi << 32) | rep.sbc_lo;
+
+    UnlockDisplay(dpy);
+    SyncHandle();
+
+    return True;
+}
+#endif
+
+#ifdef X_DRI2SwapInterval
+void DRI2SwapInterval(Display *dpy, XID drawable, int interval)
+{
+    XExtDisplayInfo *info = DRI2FindDisplay(dpy);
+    xDRI2SwapIntervalReq *req;
+
+    XextSimpleCheckExtension (dpy, info, dri2ExtensionName);
+
+    LockDisplay(dpy);
+    GetReq(DRI2SwapInterval, req);
+    req->reqType = info->codes->major_opcode;
+    req->dri2ReqType = X_DRI2SwapInterval;
+    req->drawable = drawable;
+    req->interval = interval;
+    UnlockDisplay(dpy);
+    SyncHandle();
+}
+#endif
diff --git a/src/gallium/winsys/g3dvl/dri/dri2.h b/src/gallium/winsys/g3dvl/dri/dri2.h
new file mode 100644
index 00000000000..114e9f8f965
--- /dev/null
+++ b/src/gallium/winsys/g3dvl/dri/dri2.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright © 2007,2008 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Soft-
+ * ware"), to deal in the Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge, publish, distribute,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, provided that the above copyright
+ * notice(s) and this permission notice appear in all copies of the Soft-
+ * ware and that both the above copyright notice(s) and this permission
+ * notice appear in supporting documentation.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+ * ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY
+ * RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN
+ * THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSE-
+ * QUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFOR-
+ * MANCE OF THIS SOFTWARE.
+ *
+ * Except as contained in this notice, the name of a copyright holder shall
+ * not be used in advertising or otherwise to promote the sale, use or
+ * other dealings in this Software without prior written authorization of
+ * the copyright holder.
+ *
+ * Authors:
+ *   Kristian Høgsberg (krh@redhat.com)
+ */
+
+#ifndef _DRI2_H_
+#define _DRI2_H_
+
+#include <X11/extensions/Xfixes.h>
+#include <X11/extensions/dri2tokens.h>
+
+typedef struct
+{
+   unsigned int attachment;
+   unsigned int name;
+   unsigned int pitch;
+   unsigned int cpp;
+   unsigned int flags;
+} DRI2Buffer;
+
+extern Bool
+DRI2QueryExtension(Display * display, int *eventBase, int *errorBase);
+
+extern Bool
+DRI2QueryVersion(Display * display, int *major, int *minor);
+
+extern Bool
+DRI2Connect(Display * display, XID window,
+            char **driverName, char **deviceName);
+
+extern Bool
+DRI2Authenticate(Display * display, XID window, drm_magic_t magic);
+
+extern void
+DRI2CreateDrawable(Display * display, XID drawable);
+
+extern void
+DRI2DestroyDrawable(Display * display, XID handle);
+
+extern DRI2Buffer*
+DRI2GetBuffers(Display * dpy, XID drawable,
+               int *width, int *height,
+               unsigned int *attachments, int count,
+               int *outCount);
+
+/**
+ * \note
+ * This function is only supported with DRI2 version 1.1 or later.
+ */
+extern DRI2Buffer*
+DRI2GetBuffersWithFormat(Display * dpy, XID drawable,
+                         int *width, int *height,
+                         unsigned int *attachments,
+                         int count, int *outCount);
+
+extern void
+DRI2CopyRegion(Display * dpy, XID drawable,
+               XserverRegion region,
+               CARD32 dest, CARD32 src);
+
+extern void
+DRI2SwapBuffers(Display *dpy, XID drawable, CARD64 target_msc, CARD64 divisor,
+		CARD64 remainder, CARD64 *count);
+
+extern Bool
+DRI2GetMSC(Display *dpy, XID drawable, CARD64 *ust, CARD64 *msc, CARD64 *sbc);
+
+extern Bool
+DRI2WaitMSC(Display *dpy, XID drawable, CARD64 target_msc, CARD64 divisor,
+	    CARD64 remainder, CARD64 *ust, CARD64 *msc, CARD64 *sbc);
+
+extern Bool
+DRI2WaitSBC(Display *dpy, XID drawable, CARD64 target_sbc, CARD64 *ust,
+	    CARD64 *msc, CARD64 *sbc);
+
+extern void
+DRI2SwapInterval(Display *dpy, XID drawable, int interval);
+
+#endif
diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
index c8ff8fae169..893152c5146 100644
--- a/src/gallium/winsys/g3dvl/dri/dri_winsys.c
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -40,6 +40,7 @@ struct vl_dri_screen
    dri_screen_t *dri_screen;
    dri_framebuffer_t dri_framebuf;
    struct dri1_api *api_hooks;
+   boolean dri2;
 };
 
 struct vl_dri_context
@@ -52,6 +53,7 @@ struct vl_dri_context
    int fd;
    struct pipe_video_context *vpipe;
    dri_drawable_t *drawable;
+   struct pipe_surface *dri2_front;
 };
 
 static void
@@ -171,11 +173,11 @@ vl_dri_intersect_src_bbox(struct drm_clip_rect *dst, int dst_x, int dst_y,
 
 static void
 vl_clip_copy(struct vl_dri_context *vl_dri_ctx,
-	     struct pipe_surface *dst,
-	     struct pipe_surface *src,
-	     const struct drm_clip_rect *src_bbox)
+             struct pipe_surface *dst,
+             struct pipe_surface *src,
+             const struct drm_clip_rect *src_bbox)
 {
-   struct pipe_video_context *vpipe = vl_dri_ctx->base.vpipe;
+   struct pipe_video_context *vpipe;
    struct drm_clip_rect clip;
    struct drm_clip_rect *cur;
    int i;
@@ -185,6 +187,8 @@ vl_clip_copy(struct vl_dri_context *vl_dri_ctx,
    assert(src);
    assert(src_bbox);
 
+   vpipe = vl_dri_ctx->base.vpipe;
+
    assert(vl_dri_ctx->drawable->cliprects);
    assert(vl_dri_ctx->drawable->num_cliprects > 0);
 
@@ -268,6 +272,56 @@ vl_dri_flush_frontbuffer(struct pipe_screen *screen,
    vl_dri_unlock(vl_dri_ctx);
 }
 
+static struct pipe_surface*
+vl_dri2_get_front(struct vl_dri_screen *vl_dri_scrn, Drawable drawable)
+{
+   int w, h;
+   unsigned int attachments[1] = {DRI_BUFFER_FRONT_LEFT};
+   int count;
+   DRI2Buffer *dri2_front;
+   struct pipe_texture template, *front_tex;
+   struct pipe_surface *front_surf = NULL;
+
+   assert(vl_dri_scrn);
+
+   dri2_front = DRI2GetBuffers(vl_dri_scrn->dri_screen->display,
+                               drawable, &w, &h, attachments, 1, &count);
+   if (dri2_front) {
+      front_tex = vl_dri_scrn->api->texture_from_shared_handle(vl_dri_scrn->api, vl_dri_scrn->base.pscreen,
+                                                               &template, "", dri2_front->pitch, dri2_front->name);
+      if (front_tex)
+         front_surf = vl_dri_scrn->base.pscreen->get_tex_surface(vl_dri_scrn->base.pscreen,
+                                                                 front_tex, 0, 0, 0,
+                                                                 PIPE_BUFFER_USAGE_GPU_READ_WRITE);
+      pipe_texture_reference(&front_tex, NULL);
+   }
+
+   return front_surf;
+}
+
+static void
+vl_dri2_flush_frontbuffer(struct pipe_screen *screen,
+                          struct pipe_surface *surf, void *context_private)
+{
+   struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)context_private;
+   struct vl_dri_screen *vl_dri_scrn;
+   struct pipe_video_context *vpipe;
+
+   assert(screen);
+   assert(surf);
+   assert(context_private);
+   assert(vl_dri_ctx->dri2_front);
+
+   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
+   vpipe = vl_dri_ctx->base.vpipe;
+
+   /* XXX: Why not just render to fake front? */
+   vpipe->surface_copy(vpipe, vl_dri_ctx->dri2_front, 0, 0, surf, 0, 0, surf->width, surf->height);
+
+   //st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, fence);
+}
+
+
 Drawable
 vl_video_bind_drawable(struct vl_context *vctx, Drawable drawable)
 {
@@ -281,9 +335,17 @@ vl_video_bind_drawable(struct vl_context *vctx, Drawable drawable)
    if (vl_dri_ctx->drawable)
       old_drawable = vl_dri_ctx->drawable->x_drawable;
 
-   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
-   driCreateDrawable(vl_dri_scrn->dri_screen, drawable, &dri_drawable);
-   vl_dri_ctx->drawable = dri_drawable;
+   if (drawable != old_drawable) {
+      vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
+      if (vl_dri_scrn->dri2) {
+         /* XXX: Need dri2CreateDrawable()? */
+         vl_dri_ctx->dri2_front = vl_dri2_get_front(vl_dri_scrn, drawable);
+      }
+      else {
+         driCreateDrawable(vl_dri_scrn->dri_screen, drawable, &dri_drawable);
+         vl_dri_ctx->drawable = dri_drawable;
+      }
+   }
 
    return old_drawable;
 }
@@ -300,18 +362,39 @@ vl_screen_create(Display *display, int screen)
    if (!vl_dri_scrn)
       return NULL;
 
-   driCreateScreen(display, screen, &vl_dri_scrn->dri_screen, &vl_dri_scrn->dri_framebuf);
-   vl_dri_scrn->api = drm_api_create();
+   /* Try DRI2 first */
+   if (dri2CreateScreen(display, screen, &vl_dri_scrn->dri_screen)) {
+      /* If not, try DRI */
+      if (driCreateScreen(display, screen, &vl_dri_scrn->dri_screen, &vl_dri_scrn->dri_framebuf)) {
+         /* Now what? */
+         FREE(vl_dri_scrn);
+         return NULL;
+      }
+      else {
+         /* Got DRI */
+         arg.base.mode = DRM_CREATE_DRI1;
+         arg.lf = &dri1_lf;
+         arg.ddx_info = vl_dri_scrn->dri_framebuf.private;
+         arg.ddx_info_size = vl_dri_scrn->dri_framebuf.private_size;
+         arg.sarea = vl_dri_scrn->dri_screen->sarea;
+         vl_dri_copy_version(&arg.ddx_version, &vl_dri_scrn->dri_screen->ddx);
+         vl_dri_copy_version(&arg.dri_version, &vl_dri_scrn->dri_screen->dri);
+         vl_dri_copy_version(&arg.drm_version, &vl_dri_scrn->dri_screen->drm);
+         arg.api = NULL;
+         vl_dri_scrn->dri2 = FALSE;
+      }
+   }
+   else {
+      /* Got DRI2 */
+      arg.base.mode = DRM_CREATE_NORMAL;
+      vl_dri_scrn->dri2 = TRUE;
+   }
 
-   arg.base.mode = DRM_CREATE_DRI1;
-   arg.lf = &dri1_lf;
-   arg.ddx_info = vl_dri_scrn->dri_framebuf.private;
-   arg.ddx_info_size = vl_dri_scrn->dri_framebuf.private_size;
-   arg.sarea = vl_dri_scrn->dri_screen->sarea;
-   vl_dri_copy_version(&arg.ddx_version, &vl_dri_scrn->dri_screen->ddx);
-   vl_dri_copy_version(&arg.dri_version, &vl_dri_scrn->dri_screen->dri);
-   vl_dri_copy_version(&arg.drm_version, &vl_dri_scrn->dri_screen->drm);
-   arg.api = NULL;
+   vl_dri_scrn->api = drm_api_create();
+   if (!vl_dri_scrn->api) {
+      FREE(vl_dri_scrn);
+      return NULL;
+   }
 
    vl_dri_scrn->base.pscreen = vl_dri_scrn->api->create_screen(vl_dri_scrn->api,
                                                                vl_dri_scrn->dri_screen->fd,
@@ -322,11 +405,14 @@ vl_screen_create(Display *display, int screen)
       return NULL;
    }
 
-   vl_dri_scrn->visual = XDefaultVisual(display, screen);
-   vl_dri_scrn->api_hooks = arg.api;
-   vl_dri_scrn->base.pscreen->flush_frontbuffer = vl_dri_flush_frontbuffer;
-   /* XXX: Safe to call this while unlocked? */
-   vl_dri_scrn->base.format = vl_dri_scrn->api_hooks->front_srf_locked(vl_dri_scrn->base.pscreen)->format;
+   if (!vl_dri_scrn->dri2) {
+      vl_dri_scrn->visual = XDefaultVisual(display, screen);
+      vl_dri_scrn->api_hooks = arg.api;
+      vl_dri_scrn->base.format = vl_dri_scrn->api_hooks->front_srf_locked(vl_dri_scrn->base.pscreen)->format;
+      vl_dri_scrn->base.pscreen->flush_frontbuffer = vl_dri_flush_frontbuffer;
+   }
+   else
+      vl_dri_scrn->base.pscreen->flush_frontbuffer = vl_dri2_flush_frontbuffer;
 
    return &vl_dri_scrn->base;
 }
@@ -338,7 +424,10 @@ void vl_screen_destroy(struct vl_screen *vscreen)
    assert(vscreen);
 
    vl_dri_scrn->base.pscreen->destroy(vl_dri_scrn->base.pscreen);
-   driDestroyScreen(vl_dri_scrn->dri_screen);
+   if (vl_dri_scrn->dri2)
+      dri2DestroyScreen(vl_dri_scrn->dri_screen);
+   else
+      driDestroyScreen(vl_dri_scrn->dri_screen);
    FREE(vl_dri_scrn);
 }
 
@@ -356,7 +445,8 @@ vl_video_create(struct vl_screen *vscreen,
       return NULL;
 
    /* XXX: Is default visual correct/sufficient here? */
-   driCreateContext(vl_dri_scrn->dri_screen, vl_dri_scrn->visual, &vl_dri_ctx->dri_context);
+   if (!vl_dri_scrn->dri2)
+      driCreateContext(vl_dri_scrn->dri_screen, vl_dri_scrn->visual, &vl_dri_ctx->dri_context);
 
    if (!vscreen->pscreen->video_context_create) {
       debug_printf("[G3DVL] No video support found on %s/%s.\n",
@@ -379,7 +469,8 @@ vl_video_create(struct vl_screen *vscreen,
    vl_dri_ctx->base.vpipe->priv = vl_dri_ctx;
    vl_dri_ctx->base.vscreen = vscreen;
    vl_dri_ctx->fd = vl_dri_scrn->dri_screen->fd;
-   vl_dri_ctx->lock = (drmLock*)&vl_dri_scrn->dri_screen->sarea->lock;
+   if (!vl_dri_scrn->dri2)
+      vl_dri_ctx->lock = (drmLock*)&vl_dri_scrn->dri_screen->sarea->lock;
 
    return &vl_dri_ctx->base;
 }
@@ -391,5 +482,7 @@ void vl_video_destroy(struct vl_context *vctx)
    assert(vctx);
 
    vl_dri_ctx->base.vpipe->destroy(vl_dri_ctx->base.vpipe);
+   if (!((struct vl_dri_screen *)vctx->vscreen)->dri2)
+      driDestroyContext(vl_dri_ctx->dri_context);
    FREE(vl_dri_ctx);
 }
diff --git a/src/gallium/winsys/g3dvl/dri/driclient.c b/src/gallium/winsys/g3dvl/dri/driclient.c
index dc2189afec3..7a2469c0d66 100644
--- a/src/gallium/winsys/g3dvl/dri/driclient.c
+++ b/src/gallium/winsys/g3dvl/dri/driclient.c
@@ -1,6 +1,8 @@
 #include "driclient.h"
 #include <assert.h>
 #include <stdlib.h>
+#include <fcntl.h>
+#include <X11/Xlibint.h>
 
 int driCreateScreen(Display *display, int screen, dri_screen_t **dri_screen, dri_framebuffer_t *dri_framebuf)
 {
@@ -222,12 +224,12 @@ int driUpdateDrawableInfo(dri_drawable_t *dri_drawable)
 		dri_drawable->dri_screen->display,
 		dri_drawable->dri_screen->num,
 		dri_drawable->x_drawable,
-    		&dri_drawable->sarea_index,
-    		&dri_drawable->last_sarea_stamp,
-    		&dri_drawable->x,
-    		&dri_drawable->y,
-    		&dri_drawable->w,
-    		&dri_drawable->h,
+		&dri_drawable->sarea_index,
+		&dri_drawable->last_sarea_stamp,
+		&dri_drawable->x,
+		&dri_drawable->y,
+		&dri_drawable->w,
+		&dri_drawable->h,
 		&dri_drawable->num_cliprects,
 		&dri_drawable->cliprects,
 		&dri_drawable->back_x,
@@ -308,3 +310,60 @@ int driDestroyContext(dri_context_t *dri_context)
 
 	return 0;
 }
+
+int dri2CreateScreen(Display *display, int screen, dri_screen_t **dri_screen)
+{
+	dri_screen_t	*dri_scrn;
+	drm_magic_t	magic;
+	char		*drvName;
+	char		*devName;
+
+	dri_scrn = calloc(1, sizeof(dri_screen_t));
+
+	if (!dri_scrn)
+		return 1;
+
+	if (!DRI2Connect(display, XRootWindow(display, screen), &drvName, &devName))
+		goto free_screen;
+
+	dri_scrn->fd = open(devName, O_RDWR);
+	Xfree(drvName);
+	Xfree(devName);
+	if (dri_scrn->fd < 0)
+		goto free_screen;
+
+	if (drmGetMagic(dri_scrn->fd, &magic))
+		goto free_screen;
+
+	if (!DRI2Authenticate(display, RootWindow(display, screen), magic))
+		goto free_screen;
+
+	dri_scrn->display = display;
+	dri_scrn->num = screen;
+	*dri_screen = dri_scrn;
+
+	return 0;
+
+free_screen:
+	free(dri_scrn);
+
+	return 1;
+}
+
+int dri2DestroyScreen(dri_screen_t *dri_screen)
+{
+	/* Not much to do here apparently... */
+	return 0;
+}
+
+int dri2CreateDrawable(dri_screen_t *dri_screen, XID drawable)
+{
+	DRI2CreateDrawable(dri_screen->display, drawable);
+	return 0;
+}
+
+int dri2DestroyDrawable(dri_screen_t *dri_screen, XID drawable)
+{
+	DRI2DestroyDrawable(dri_screen->display, drawable);
+	return 0;
+}
diff --git a/src/gallium/winsys/g3dvl/dri/driclient.h b/src/gallium/winsys/g3dvl/dri/driclient.h
index d3915250392..c71b6c2c831 100644
--- a/src/gallium/winsys/g3dvl/dri/driclient.h
+++ b/src/gallium/winsys/g3dvl/dri/driclient.h
@@ -2,9 +2,12 @@
 #define driclient_h
 
 #include <stdint.h>
+#include <X11/Xmd.h>
 #include <X11/Xlib.h>
 #include <drm_sarea.h>
+//#include <X11/extensions/dri2proto.h>
 #include "xf86dri.h"
+#include "dri2.h"
 
 /* TODO: Bring in DRI XML options */
 
@@ -93,5 +96,21 @@ do												\
 	}											\
 } while (0)
 
+int dri2CreateScreen(Display *display, int screen, dri_screen_t **dri_screen);
+int dri2DestroyScreen(dri_screen_t *dri_screen);
+int dri2CreateDrawable(dri_screen_t *dri_screen, XID drawable);
+int dri2DestroyDrawable(dri_screen_t *dri_screen, XID drawable);
+
+#define DRI_BUFFER_FRONT_LEFT		0
+#define DRI_BUFFER_BACK_LEFT		1
+#define DRI_BUFFER_FRONT_RIGHT		2
+#define DRI_BUFFER_BACK_RIGHT		3
+#define DRI_BUFFER_DEPTH		4
+#define DRI_BUFFER_STENCIL		5
+#define DRI_BUFFER_ACCUM		6
+#define DRI_BUFFER_FAKE_FRONT_LEFT	7
+#define DRI_BUFFER_FAKE_FRONT_RIGHT	8
+#define DRI_BUFFER_DEPTH_STENCIL	9  /**< Only available with DRI2 1.1 */
+
 #endif
 
-- 
cgit v1.2.3


From fa3f0c832cd464c40157f35a61d47a8a618f70d8 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 8 May 2010 20:10:04 -0400
Subject: gallium: Get rid of auxilary/util files that were removed in master.

---
 src/gallium/auxiliary/indices/u_indices_gen.c  | 5129 ------------------------
 src/gallium/auxiliary/indices/u_unfilled_gen.c |  992 -----
 2 files changed, 6121 deletions(-)
 delete mode 100644 src/gallium/auxiliary/indices/u_indices_gen.c
 delete mode 100644 src/gallium/auxiliary/indices/u_unfilled_gen.c

diff --git a/src/gallium/auxiliary/indices/u_indices_gen.c b/src/gallium/auxiliary/indices/u_indices_gen.c
deleted file mode 100644
index 3c981e5d7f4..00000000000
--- a/src/gallium/auxiliary/indices/u_indices_gen.c
+++ /dev/null
@@ -1,5129 +0,0 @@
-/* File automatically generated by indices.py */
-
-/*
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
- * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-
-/**
- * @file
- * Functions to translate and generate index lists
- */
-
-#include "indices/u_indices.h"
-#include "indices/u_indices_priv.h"
-#include "pipe/p_compiler.h"
-#include "util/u_debug.h"
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-
-
-static unsigned out_size_idx( unsigned index_size )
-{
-   switch (index_size) {
-   case 4: return OUT_UINT;
-   case 2: return OUT_USHORT;
-   default: assert(0); return OUT_USHORT;
-   }
-}
-
-static unsigned in_size_idx( unsigned index_size )
-{
-   switch (index_size) {
-   case 4: return IN_UINT;
-   case 2: return IN_USHORT;
-   case 1: return IN_UBYTE;
-   default: assert(0); return IN_UBYTE;
-   }
-}
-
-
-static u_translate_func translate[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT];
-static u_generate_func  generate[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT];
-
-
-
-static void generate_points_ushort_first2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)(i);
-   }
-}
-static void generate_lines_ushort_first2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)(i);
-      (out+i)[1] = (ushort)(i+1);
-   }
-}
-static void generate_linestrip_ushort_first2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)(i);
-      (out+j)[1] = (ushort)(i+1);
-   }
-}
-static void generate_lineloop_ushort_first2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)(i);
-      (out+j)[1] = (ushort)(i+1);
-   }
-      (out+j)[0] = (ushort)(i);
-      (out+j)[1] = (ushort)(0);
-}
-static void generate_tris_ushort_first2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)(i);
-      (out+i)[1] = (ushort)(i+1);
-      (out+i)[2] = (ushort)(i+2);
-   }
-}
-static void generate_tristrip_ushort_first2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)(i);
-      (out+j)[1] = (ushort)(i+1+(i&1));
-      (out+j)[2] = (ushort)(i+2-(i&1));
-   }
-}
-static void generate_trifan_ushort_first2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)(0);
-      (out+j)[1] = (ushort)(i+1);
-      (out+j)[2] = (ushort)(i+2);
-   }
-}
-static void generate_quads_ushort_first2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)(i+0);
-      (out+j+0)[1] = (ushort)(i+1);
-      (out+j+0)[2] = (ushort)(i+3);
-      (out+j+3)[0] = (ushort)(i+1);
-      (out+j+3)[1] = (ushort)(i+2);
-      (out+j+3)[2] = (ushort)(i+3);
-   }
-}
-static void generate_quadstrip_ushort_first2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)(i+2);
-      (out+j+0)[1] = (ushort)(i+0);
-      (out+j+0)[2] = (ushort)(i+3);
-      (out+j+3)[0] = (ushort)(i+0);
-      (out+j+3)[1] = (ushort)(i+1);
-      (out+j+3)[2] = (ushort)(i+3);
-   }
-}
-static void generate_polygon_ushort_first2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)(0);
-      (out+j)[1] = (ushort)(i+1);
-      (out+j)[2] = (ushort)(i+2);
-   }
-}
-static void generate_points_ushort_first2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)(i);
-   }
-}
-static void generate_lines_ushort_first2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)(i+1);
-      (out+i)[1] = (ushort)(i);
-   }
-}
-static void generate_linestrip_ushort_first2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)(i+1);
-      (out+j)[1] = (ushort)(i);
-   }
-}
-static void generate_lineloop_ushort_first2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)(i+1);
-      (out+j)[1] = (ushort)(i);
-   }
-      (out+j)[0] = (ushort)(0);
-      (out+j)[1] = (ushort)(i);
-}
-static void generate_tris_ushort_first2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)(i+1);
-      (out+i)[1] = (ushort)(i+2);
-      (out+i)[2] = (ushort)(i);
-   }
-}
-static void generate_tristrip_ushort_first2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)(i+1+(i&1));
-      (out+j)[1] = (ushort)(i+2-(i&1));
-      (out+j)[2] = (ushort)(i);
-   }
-}
-static void generate_trifan_ushort_first2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)(i+1);
-      (out+j)[1] = (ushort)(i+2);
-      (out+j)[2] = (ushort)(0);
-   }
-}
-static void generate_quads_ushort_first2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)(i+1);
-      (out+j+0)[1] = (ushort)(i+3);
-      (out+j+0)[2] = (ushort)(i+0);
-      (out+j+3)[0] = (ushort)(i+2);
-      (out+j+3)[1] = (ushort)(i+3);
-      (out+j+3)[2] = (ushort)(i+1);
-   }
-}
-static void generate_quadstrip_ushort_first2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)(i+0);
-      (out+j+0)[1] = (ushort)(i+3);
-      (out+j+0)[2] = (ushort)(i+2);
-      (out+j+3)[0] = (ushort)(i+1);
-      (out+j+3)[1] = (ushort)(i+3);
-      (out+j+3)[2] = (ushort)(i+0);
-   }
-}
-static void generate_polygon_ushort_first2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)(i+1);
-      (out+j)[1] = (ushort)(i+2);
-      (out+j)[2] = (ushort)(0);
-   }
-}
-static void generate_points_ushort_last2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)(i);
-   }
-}
-static void generate_lines_ushort_last2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)(i+1);
-      (out+i)[1] = (ushort)(i);
-   }
-}
-static void generate_linestrip_ushort_last2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)(i+1);
-      (out+j)[1] = (ushort)(i);
-   }
-}
-static void generate_lineloop_ushort_last2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)(i+1);
-      (out+j)[1] = (ushort)(i);
-   }
-      (out+j)[0] = (ushort)(0);
-      (out+j)[1] = (ushort)(i);
-}
-static void generate_tris_ushort_last2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)(i+2);
-      (out+i)[1] = (ushort)(i);
-      (out+i)[2] = (ushort)(i+1);
-   }
-}
-static void generate_tristrip_ushort_last2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)(i+2);
-      (out+j)[1] = (ushort)(i+(i&1));
-      (out+j)[2] = (ushort)(i+1-(i&1));
-   }
-}
-static void generate_trifan_ushort_last2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)(i+2);
-      (out+j)[1] = (ushort)(0);
-      (out+j)[2] = (ushort)(i+1);
-   }
-}
-static void generate_quads_ushort_last2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)(i+3);
-      (out+j+0)[1] = (ushort)(i+0);
-      (out+j+0)[2] = (ushort)(i+1);
-      (out+j+3)[0] = (ushort)(i+3);
-      (out+j+3)[1] = (ushort)(i+1);
-      (out+j+3)[2] = (ushort)(i+2);
-   }
-}
-static void generate_quadstrip_ushort_last2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)(i+3);
-      (out+j+0)[1] = (ushort)(i+2);
-      (out+j+0)[2] = (ushort)(i+0);
-      (out+j+3)[0] = (ushort)(i+3);
-      (out+j+3)[1] = (ushort)(i+0);
-      (out+j+3)[2] = (ushort)(i+1);
-   }
-}
-static void generate_polygon_ushort_last2first(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)(0);
-      (out+j)[1] = (ushort)(i+1);
-      (out+j)[2] = (ushort)(i+2);
-   }
-}
-static void generate_points_ushort_last2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)(i);
-   }
-}
-static void generate_lines_ushort_last2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)(i);
-      (out+i)[1] = (ushort)(i+1);
-   }
-}
-static void generate_linestrip_ushort_last2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)(i);
-      (out+j)[1] = (ushort)(i+1);
-   }
-}
-static void generate_lineloop_ushort_last2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)(i);
-      (out+j)[1] = (ushort)(i+1);
-   }
-      (out+j)[0] = (ushort)(i);
-      (out+j)[1] = (ushort)(0);
-}
-static void generate_tris_ushort_last2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)(i);
-      (out+i)[1] = (ushort)(i+1);
-      (out+i)[2] = (ushort)(i+2);
-   }
-}
-static void generate_tristrip_ushort_last2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)(i+(i&1));
-      (out+j)[1] = (ushort)(i+1-(i&1));
-      (out+j)[2] = (ushort)(i+2);
-   }
-}
-static void generate_trifan_ushort_last2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)(0);
-      (out+j)[1] = (ushort)(i+1);
-      (out+j)[2] = (ushort)(i+2);
-   }
-}
-static void generate_quads_ushort_last2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)(i+0);
-      (out+j+0)[1] = (ushort)(i+1);
-      (out+j+0)[2] = (ushort)(i+3);
-      (out+j+3)[0] = (ushort)(i+1);
-      (out+j+3)[1] = (ushort)(i+2);
-      (out+j+3)[2] = (ushort)(i+3);
-   }
-}
-static void generate_quadstrip_ushort_last2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)(i+2);
-      (out+j+0)[1] = (ushort)(i+0);
-      (out+j+0)[2] = (ushort)(i+3);
-      (out+j+3)[0] = (ushort)(i+0);
-      (out+j+3)[1] = (ushort)(i+1);
-      (out+j+3)[2] = (ushort)(i+3);
-   }
-}
-static void generate_polygon_ushort_last2last(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)(i+1);
-      (out+j)[1] = (ushort)(i+2);
-      (out+j)[2] = (ushort)(0);
-   }
-}
-static void generate_points_uint_first2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)(i);
-   }
-}
-static void generate_lines_uint_first2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)(i);
-      (out+i)[1] = (uint)(i+1);
-   }
-}
-static void generate_linestrip_uint_first2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)(i);
-      (out+j)[1] = (uint)(i+1);
-   }
-}
-static void generate_lineloop_uint_first2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)(i);
-      (out+j)[1] = (uint)(i+1);
-   }
-      (out+j)[0] = (uint)(i);
-      (out+j)[1] = (uint)(0);
-}
-static void generate_tris_uint_first2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)(i);
-      (out+i)[1] = (uint)(i+1);
-      (out+i)[2] = (uint)(i+2);
-   }
-}
-static void generate_tristrip_uint_first2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)(i);
-      (out+j)[1] = (uint)(i+1+(i&1));
-      (out+j)[2] = (uint)(i+2-(i&1));
-   }
-}
-static void generate_trifan_uint_first2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)(0);
-      (out+j)[1] = (uint)(i+1);
-      (out+j)[2] = (uint)(i+2);
-   }
-}
-static void generate_quads_uint_first2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)(i+0);
-      (out+j+0)[1] = (uint)(i+1);
-      (out+j+0)[2] = (uint)(i+3);
-      (out+j+3)[0] = (uint)(i+1);
-      (out+j+3)[1] = (uint)(i+2);
-      (out+j+3)[2] = (uint)(i+3);
-   }
-}
-static void generate_quadstrip_uint_first2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)(i+2);
-      (out+j+0)[1] = (uint)(i+0);
-      (out+j+0)[2] = (uint)(i+3);
-      (out+j+3)[0] = (uint)(i+0);
-      (out+j+3)[1] = (uint)(i+1);
-      (out+j+3)[2] = (uint)(i+3);
-   }
-}
-static void generate_polygon_uint_first2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)(0);
-      (out+j)[1] = (uint)(i+1);
-      (out+j)[2] = (uint)(i+2);
-   }
-}
-static void generate_points_uint_first2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)(i);
-   }
-}
-static void generate_lines_uint_first2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)(i+1);
-      (out+i)[1] = (uint)(i);
-   }
-}
-static void generate_linestrip_uint_first2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)(i+1);
-      (out+j)[1] = (uint)(i);
-   }
-}
-static void generate_lineloop_uint_first2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)(i+1);
-      (out+j)[1] = (uint)(i);
-   }
-      (out+j)[0] = (uint)(0);
-      (out+j)[1] = (uint)(i);
-}
-static void generate_tris_uint_first2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)(i+1);
-      (out+i)[1] = (uint)(i+2);
-      (out+i)[2] = (uint)(i);
-   }
-}
-static void generate_tristrip_uint_first2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)(i+1+(i&1));
-      (out+j)[1] = (uint)(i+2-(i&1));
-      (out+j)[2] = (uint)(i);
-   }
-}
-static void generate_trifan_uint_first2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)(i+1);
-      (out+j)[1] = (uint)(i+2);
-      (out+j)[2] = (uint)(0);
-   }
-}
-static void generate_quads_uint_first2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)(i+1);
-      (out+j+0)[1] = (uint)(i+3);
-      (out+j+0)[2] = (uint)(i+0);
-      (out+j+3)[0] = (uint)(i+2);
-      (out+j+3)[1] = (uint)(i+3);
-      (out+j+3)[2] = (uint)(i+1);
-   }
-}
-static void generate_quadstrip_uint_first2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)(i+0);
-      (out+j+0)[1] = (uint)(i+3);
-      (out+j+0)[2] = (uint)(i+2);
-      (out+j+3)[0] = (uint)(i+1);
-      (out+j+3)[1] = (uint)(i+3);
-      (out+j+3)[2] = (uint)(i+0);
-   }
-}
-static void generate_polygon_uint_first2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)(i+1);
-      (out+j)[1] = (uint)(i+2);
-      (out+j)[2] = (uint)(0);
-   }
-}
-static void generate_points_uint_last2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)(i);
-   }
-}
-static void generate_lines_uint_last2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)(i+1);
-      (out+i)[1] = (uint)(i);
-   }
-}
-static void generate_linestrip_uint_last2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)(i+1);
-      (out+j)[1] = (uint)(i);
-   }
-}
-static void generate_lineloop_uint_last2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)(i+1);
-      (out+j)[1] = (uint)(i);
-   }
-      (out+j)[0] = (uint)(0);
-      (out+j)[1] = (uint)(i);
-}
-static void generate_tris_uint_last2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)(i+2);
-      (out+i)[1] = (uint)(i);
-      (out+i)[2] = (uint)(i+1);
-   }
-}
-static void generate_tristrip_uint_last2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)(i+2);
-      (out+j)[1] = (uint)(i+(i&1));
-      (out+j)[2] = (uint)(i+1-(i&1));
-   }
-}
-static void generate_trifan_uint_last2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)(i+2);
-      (out+j)[1] = (uint)(0);
-      (out+j)[2] = (uint)(i+1);
-   }
-}
-static void generate_quads_uint_last2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)(i+3);
-      (out+j+0)[1] = (uint)(i+0);
-      (out+j+0)[2] = (uint)(i+1);
-      (out+j+3)[0] = (uint)(i+3);
-      (out+j+3)[1] = (uint)(i+1);
-      (out+j+3)[2] = (uint)(i+2);
-   }
-}
-static void generate_quadstrip_uint_last2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)(i+3);
-      (out+j+0)[1] = (uint)(i+2);
-      (out+j+0)[2] = (uint)(i+0);
-      (out+j+3)[0] = (uint)(i+3);
-      (out+j+3)[1] = (uint)(i+0);
-      (out+j+3)[2] = (uint)(i+1);
-   }
-}
-static void generate_polygon_uint_last2first(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)(0);
-      (out+j)[1] = (uint)(i+1);
-      (out+j)[2] = (uint)(i+2);
-   }
-}
-static void generate_points_uint_last2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)(i);
-   }
-}
-static void generate_lines_uint_last2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)(i);
-      (out+i)[1] = (uint)(i+1);
-   }
-}
-static void generate_linestrip_uint_last2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)(i);
-      (out+j)[1] = (uint)(i+1);
-   }
-}
-static void generate_lineloop_uint_last2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)(i);
-      (out+j)[1] = (uint)(i+1);
-   }
-      (out+j)[0] = (uint)(i);
-      (out+j)[1] = (uint)(0);
-}
-static void generate_tris_uint_last2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)(i);
-      (out+i)[1] = (uint)(i+1);
-      (out+i)[2] = (uint)(i+2);
-   }
-}
-static void generate_tristrip_uint_last2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)(i+(i&1));
-      (out+j)[1] = (uint)(i+1-(i&1));
-      (out+j)[2] = (uint)(i+2);
-   }
-}
-static void generate_trifan_uint_last2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)(0);
-      (out+j)[1] = (uint)(i+1);
-      (out+j)[2] = (uint)(i+2);
-   }
-}
-static void generate_quads_uint_last2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)(i+0);
-      (out+j+0)[1] = (uint)(i+1);
-      (out+j+0)[2] = (uint)(i+3);
-      (out+j+3)[0] = (uint)(i+1);
-      (out+j+3)[1] = (uint)(i+2);
-      (out+j+3)[2] = (uint)(i+3);
-   }
-}
-static void generate_quadstrip_uint_last2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)(i+2);
-      (out+j+0)[1] = (uint)(i+0);
-      (out+j+0)[2] = (uint)(i+3);
-      (out+j+3)[0] = (uint)(i+0);
-      (out+j+3)[1] = (uint)(i+1);
-      (out+j+3)[2] = (uint)(i+3);
-   }
-}
-static void generate_polygon_uint_last2last(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)(i+1);
-      (out+j)[1] = (uint)(i+2);
-      (out+j)[2] = (uint)(0);
-   }
-}
-static void translate_points_ubyte2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)in[i];
-   }
-}
-static void translate_lines_ubyte2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)in[i];
-      (out+i)[1] = (ushort)in[i+1];
-   }
-}
-static void translate_linestrip_ubyte2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-   }
-}
-static void translate_lineloop_ubyte2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-   }
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[0];
-}
-static void translate_tris_ubyte2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)in[i];
-      (out+i)[1] = (ushort)in[i+1];
-      (out+i)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_tristrip_ubyte2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1+(i&1)];
-      (out+j)[2] = (ushort)in[i+2-(i&1)];
-   }
-}
-static void translate_trifan_ubyte2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_quads_ubyte2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)in[i+0];
-      (out+j+0)[1] = (ushort)in[i+1];
-      (out+j+0)[2] = (ushort)in[i+3];
-      (out+j+3)[0] = (ushort)in[i+1];
-      (out+j+3)[1] = (ushort)in[i+2];
-      (out+j+3)[2] = (ushort)in[i+3];
-   }
-}
-static void translate_quadstrip_ubyte2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)in[i+2];
-      (out+j+0)[1] = (ushort)in[i+0];
-      (out+j+0)[2] = (ushort)in[i+3];
-      (out+j+3)[0] = (ushort)in[i+0];
-      (out+j+3)[1] = (ushort)in[i+1];
-      (out+j+3)[2] = (ushort)in[i+3];
-   }
-}
-static void translate_polygon_ubyte2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_points_ubyte2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)in[i];
-   }
-}
-static void translate_lines_ubyte2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)in[i+1];
-      (out+i)[1] = (ushort)in[i];
-   }
-}
-static void translate_linestrip_ubyte2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i];
-   }
-}
-static void translate_lineloop_ubyte2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i];
-   }
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i];
-}
-static void translate_tris_ubyte2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)in[i+1];
-      (out+i)[1] = (ushort)in[i+2];
-      (out+i)[2] = (ushort)in[i];
-   }
-}
-static void translate_tristrip_ubyte2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+1+(i&1)];
-      (out+j)[1] = (ushort)in[i+2-(i&1)];
-      (out+j)[2] = (ushort)in[i];
-   }
-}
-static void translate_trifan_ubyte2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i+2];
-      (out+j)[2] = (ushort)in[0];
-   }
-}
-static void translate_quads_ubyte2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)in[i+1];
-      (out+j+0)[1] = (ushort)in[i+3];
-      (out+j+0)[2] = (ushort)in[i+0];
-      (out+j+3)[0] = (ushort)in[i+2];
-      (out+j+3)[1] = (ushort)in[i+3];
-      (out+j+3)[2] = (ushort)in[i+1];
-   }
-}
-static void translate_quadstrip_ubyte2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)in[i+0];
-      (out+j+0)[1] = (ushort)in[i+3];
-      (out+j+0)[2] = (ushort)in[i+2];
-      (out+j+3)[0] = (ushort)in[i+1];
-      (out+j+3)[1] = (ushort)in[i+3];
-      (out+j+3)[2] = (ushort)in[i+0];
-   }
-}
-static void translate_polygon_ubyte2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i+2];
-      (out+j)[2] = (ushort)in[0];
-   }
-}
-static void translate_points_ubyte2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)in[i];
-   }
-}
-static void translate_lines_ubyte2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)in[i+1];
-      (out+i)[1] = (ushort)in[i];
-   }
-}
-static void translate_linestrip_ubyte2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i];
-   }
-}
-static void translate_lineloop_ubyte2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i];
-   }
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i];
-}
-static void translate_tris_ubyte2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)in[i+2];
-      (out+i)[1] = (ushort)in[i];
-      (out+i)[2] = (ushort)in[i+1];
-   }
-}
-static void translate_tristrip_ubyte2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+2];
-      (out+j)[1] = (ushort)in[i+(i&1)];
-      (out+j)[2] = (ushort)in[i+1-(i&1)];
-   }
-}
-static void translate_trifan_ubyte2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+2];
-      (out+j)[1] = (ushort)in[0];
-      (out+j)[2] = (ushort)in[i+1];
-   }
-}
-static void translate_quads_ubyte2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)in[i+3];
-      (out+j+0)[1] = (ushort)in[i+0];
-      (out+j+0)[2] = (ushort)in[i+1];
-      (out+j+3)[0] = (ushort)in[i+3];
-      (out+j+3)[1] = (ushort)in[i+1];
-      (out+j+3)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_quadstrip_ubyte2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)in[i+3];
-      (out+j+0)[1] = (ushort)in[i+2];
-      (out+j+0)[2] = (ushort)in[i+0];
-      (out+j+3)[0] = (ushort)in[i+3];
-      (out+j+3)[1] = (ushort)in[i+0];
-      (out+j+3)[2] = (ushort)in[i+1];
-   }
-}
-static void translate_polygon_ubyte2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_points_ubyte2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)in[i];
-   }
-}
-static void translate_lines_ubyte2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)in[i];
-      (out+i)[1] = (ushort)in[i+1];
-   }
-}
-static void translate_linestrip_ubyte2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-   }
-}
-static void translate_lineloop_ubyte2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-   }
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[0];
-}
-static void translate_tris_ubyte2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)in[i];
-      (out+i)[1] = (ushort)in[i+1];
-      (out+i)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_tristrip_ubyte2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+(i&1)];
-      (out+j)[1] = (ushort)in[i+1-(i&1)];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_trifan_ubyte2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_quads_ubyte2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)in[i+0];
-      (out+j+0)[1] = (ushort)in[i+1];
-      (out+j+0)[2] = (ushort)in[i+3];
-      (out+j+3)[0] = (ushort)in[i+1];
-      (out+j+3)[1] = (ushort)in[i+2];
-      (out+j+3)[2] = (ushort)in[i+3];
-   }
-}
-static void translate_quadstrip_ubyte2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)in[i+2];
-      (out+j+0)[1] = (ushort)in[i+0];
-      (out+j+0)[2] = (ushort)in[i+3];
-      (out+j+3)[0] = (ushort)in[i+0];
-      (out+j+3)[1] = (ushort)in[i+1];
-      (out+j+3)[2] = (ushort)in[i+3];
-   }
-}
-static void translate_polygon_ubyte2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i+2];
-      (out+j)[2] = (ushort)in[0];
-   }
-}
-static void translate_points_ubyte2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)in[i];
-   }
-}
-static void translate_lines_ubyte2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)in[i];
-      (out+i)[1] = (uint)in[i+1];
-   }
-}
-static void translate_linestrip_ubyte2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-   }
-}
-static void translate_lineloop_ubyte2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-   }
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[0];
-}
-static void translate_tris_ubyte2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)in[i];
-      (out+i)[1] = (uint)in[i+1];
-      (out+i)[2] = (uint)in[i+2];
-   }
-}
-static void translate_tristrip_ubyte2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1+(i&1)];
-      (out+j)[2] = (uint)in[i+2-(i&1)];
-   }
-}
-static void translate_trifan_ubyte2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_quads_ubyte2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)in[i+0];
-      (out+j+0)[1] = (uint)in[i+1];
-      (out+j+0)[2] = (uint)in[i+3];
-      (out+j+3)[0] = (uint)in[i+1];
-      (out+j+3)[1] = (uint)in[i+2];
-      (out+j+3)[2] = (uint)in[i+3];
-   }
-}
-static void translate_quadstrip_ubyte2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)in[i+2];
-      (out+j+0)[1] = (uint)in[i+0];
-      (out+j+0)[2] = (uint)in[i+3];
-      (out+j+3)[0] = (uint)in[i+0];
-      (out+j+3)[1] = (uint)in[i+1];
-      (out+j+3)[2] = (uint)in[i+3];
-   }
-}
-static void translate_polygon_ubyte2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_points_ubyte2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)in[i];
-   }
-}
-static void translate_lines_ubyte2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)in[i+1];
-      (out+i)[1] = (uint)in[i];
-   }
-}
-static void translate_linestrip_ubyte2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i];
-   }
-}
-static void translate_lineloop_ubyte2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i];
-   }
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i];
-}
-static void translate_tris_ubyte2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)in[i+1];
-      (out+i)[1] = (uint)in[i+2];
-      (out+i)[2] = (uint)in[i];
-   }
-}
-static void translate_tristrip_ubyte2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+1+(i&1)];
-      (out+j)[1] = (uint)in[i+2-(i&1)];
-      (out+j)[2] = (uint)in[i];
-   }
-}
-static void translate_trifan_ubyte2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i+2];
-      (out+j)[2] = (uint)in[0];
-   }
-}
-static void translate_quads_ubyte2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)in[i+1];
-      (out+j+0)[1] = (uint)in[i+3];
-      (out+j+0)[2] = (uint)in[i+0];
-      (out+j+3)[0] = (uint)in[i+2];
-      (out+j+3)[1] = (uint)in[i+3];
-      (out+j+3)[2] = (uint)in[i+1];
-   }
-}
-static void translate_quadstrip_ubyte2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)in[i+0];
-      (out+j+0)[1] = (uint)in[i+3];
-      (out+j+0)[2] = (uint)in[i+2];
-      (out+j+3)[0] = (uint)in[i+1];
-      (out+j+3)[1] = (uint)in[i+3];
-      (out+j+3)[2] = (uint)in[i+0];
-   }
-}
-static void translate_polygon_ubyte2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i+2];
-      (out+j)[2] = (uint)in[0];
-   }
-}
-static void translate_points_ubyte2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)in[i];
-   }
-}
-static void translate_lines_ubyte2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)in[i+1];
-      (out+i)[1] = (uint)in[i];
-   }
-}
-static void translate_linestrip_ubyte2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i];
-   }
-}
-static void translate_lineloop_ubyte2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i];
-   }
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i];
-}
-static void translate_tris_ubyte2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)in[i+2];
-      (out+i)[1] = (uint)in[i];
-      (out+i)[2] = (uint)in[i+1];
-   }
-}
-static void translate_tristrip_ubyte2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+2];
-      (out+j)[1] = (uint)in[i+(i&1)];
-      (out+j)[2] = (uint)in[i+1-(i&1)];
-   }
-}
-static void translate_trifan_ubyte2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+2];
-      (out+j)[1] = (uint)in[0];
-      (out+j)[2] = (uint)in[i+1];
-   }
-}
-static void translate_quads_ubyte2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)in[i+3];
-      (out+j+0)[1] = (uint)in[i+0];
-      (out+j+0)[2] = (uint)in[i+1];
-      (out+j+3)[0] = (uint)in[i+3];
-      (out+j+3)[1] = (uint)in[i+1];
-      (out+j+3)[2] = (uint)in[i+2];
-   }
-}
-static void translate_quadstrip_ubyte2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)in[i+3];
-      (out+j+0)[1] = (uint)in[i+2];
-      (out+j+0)[2] = (uint)in[i+0];
-      (out+j+3)[0] = (uint)in[i+3];
-      (out+j+3)[1] = (uint)in[i+0];
-      (out+j+3)[2] = (uint)in[i+1];
-   }
-}
-static void translate_polygon_ubyte2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_points_ubyte2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)in[i];
-   }
-}
-static void translate_lines_ubyte2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)in[i];
-      (out+i)[1] = (uint)in[i+1];
-   }
-}
-static void translate_linestrip_ubyte2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-   }
-}
-static void translate_lineloop_ubyte2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-   }
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[0];
-}
-static void translate_tris_ubyte2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)in[i];
-      (out+i)[1] = (uint)in[i+1];
-      (out+i)[2] = (uint)in[i+2];
-   }
-}
-static void translate_tristrip_ubyte2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+(i&1)];
-      (out+j)[1] = (uint)in[i+1-(i&1)];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_trifan_ubyte2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_quads_ubyte2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)in[i+0];
-      (out+j+0)[1] = (uint)in[i+1];
-      (out+j+0)[2] = (uint)in[i+3];
-      (out+j+3)[0] = (uint)in[i+1];
-      (out+j+3)[1] = (uint)in[i+2];
-      (out+j+3)[2] = (uint)in[i+3];
-   }
-}
-static void translate_quadstrip_ubyte2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)in[i+2];
-      (out+j+0)[1] = (uint)in[i+0];
-      (out+j+0)[2] = (uint)in[i+3];
-      (out+j+3)[0] = (uint)in[i+0];
-      (out+j+3)[1] = (uint)in[i+1];
-      (out+j+3)[2] = (uint)in[i+3];
-   }
-}
-static void translate_polygon_ubyte2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i+2];
-      (out+j)[2] = (uint)in[0];
-   }
-}
-static void translate_points_ushort2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)in[i];
-   }
-}
-static void translate_lines_ushort2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)in[i];
-      (out+i)[1] = (ushort)in[i+1];
-   }
-}
-static void translate_linestrip_ushort2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-   }
-}
-static void translate_lineloop_ushort2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-   }
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[0];
-}
-static void translate_tris_ushort2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)in[i];
-      (out+i)[1] = (ushort)in[i+1];
-      (out+i)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_tristrip_ushort2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1+(i&1)];
-      (out+j)[2] = (ushort)in[i+2-(i&1)];
-   }
-}
-static void translate_trifan_ushort2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_quads_ushort2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)in[i+0];
-      (out+j+0)[1] = (ushort)in[i+1];
-      (out+j+0)[2] = (ushort)in[i+3];
-      (out+j+3)[0] = (ushort)in[i+1];
-      (out+j+3)[1] = (ushort)in[i+2];
-      (out+j+3)[2] = (ushort)in[i+3];
-   }
-}
-static void translate_quadstrip_ushort2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)in[i+2];
-      (out+j+0)[1] = (ushort)in[i+0];
-      (out+j+0)[2] = (ushort)in[i+3];
-      (out+j+3)[0] = (ushort)in[i+0];
-      (out+j+3)[1] = (ushort)in[i+1];
-      (out+j+3)[2] = (ushort)in[i+3];
-   }
-}
-static void translate_polygon_ushort2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_points_ushort2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)in[i];
-   }
-}
-static void translate_lines_ushort2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)in[i+1];
-      (out+i)[1] = (ushort)in[i];
-   }
-}
-static void translate_linestrip_ushort2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i];
-   }
-}
-static void translate_lineloop_ushort2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i];
-   }
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i];
-}
-static void translate_tris_ushort2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)in[i+1];
-      (out+i)[1] = (ushort)in[i+2];
-      (out+i)[2] = (ushort)in[i];
-   }
-}
-static void translate_tristrip_ushort2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+1+(i&1)];
-      (out+j)[1] = (ushort)in[i+2-(i&1)];
-      (out+j)[2] = (ushort)in[i];
-   }
-}
-static void translate_trifan_ushort2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i+2];
-      (out+j)[2] = (ushort)in[0];
-   }
-}
-static void translate_quads_ushort2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)in[i+1];
-      (out+j+0)[1] = (ushort)in[i+3];
-      (out+j+0)[2] = (ushort)in[i+0];
-      (out+j+3)[0] = (ushort)in[i+2];
-      (out+j+3)[1] = (ushort)in[i+3];
-      (out+j+3)[2] = (ushort)in[i+1];
-   }
-}
-static void translate_quadstrip_ushort2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)in[i+0];
-      (out+j+0)[1] = (ushort)in[i+3];
-      (out+j+0)[2] = (ushort)in[i+2];
-      (out+j+3)[0] = (ushort)in[i+1];
-      (out+j+3)[1] = (ushort)in[i+3];
-      (out+j+3)[2] = (ushort)in[i+0];
-   }
-}
-static void translate_polygon_ushort2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i+2];
-      (out+j)[2] = (ushort)in[0];
-   }
-}
-static void translate_points_ushort2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)in[i];
-   }
-}
-static void translate_lines_ushort2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)in[i+1];
-      (out+i)[1] = (ushort)in[i];
-   }
-}
-static void translate_linestrip_ushort2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i];
-   }
-}
-static void translate_lineloop_ushort2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i];
-   }
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i];
-}
-static void translate_tris_ushort2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)in[i+2];
-      (out+i)[1] = (ushort)in[i];
-      (out+i)[2] = (ushort)in[i+1];
-   }
-}
-static void translate_tristrip_ushort2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+2];
-      (out+j)[1] = (ushort)in[i+(i&1)];
-      (out+j)[2] = (ushort)in[i+1-(i&1)];
-   }
-}
-static void translate_trifan_ushort2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+2];
-      (out+j)[1] = (ushort)in[0];
-      (out+j)[2] = (ushort)in[i+1];
-   }
-}
-static void translate_quads_ushort2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)in[i+3];
-      (out+j+0)[1] = (ushort)in[i+0];
-      (out+j+0)[2] = (ushort)in[i+1];
-      (out+j+3)[0] = (ushort)in[i+3];
-      (out+j+3)[1] = (ushort)in[i+1];
-      (out+j+3)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_quadstrip_ushort2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)in[i+3];
-      (out+j+0)[1] = (ushort)in[i+2];
-      (out+j+0)[2] = (ushort)in[i+0];
-      (out+j+3)[0] = (ushort)in[i+3];
-      (out+j+3)[1] = (ushort)in[i+0];
-      (out+j+3)[2] = (ushort)in[i+1];
-   }
-}
-static void translate_polygon_ushort2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_points_ushort2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)in[i];
-   }
-}
-static void translate_lines_ushort2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)in[i];
-      (out+i)[1] = (ushort)in[i+1];
-   }
-}
-static void translate_linestrip_ushort2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-   }
-}
-static void translate_lineloop_ushort2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-   }
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[0];
-}
-static void translate_tris_ushort2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)in[i];
-      (out+i)[1] = (ushort)in[i+1];
-      (out+i)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_tristrip_ushort2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+(i&1)];
-      (out+j)[1] = (ushort)in[i+1-(i&1)];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_trifan_ushort2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_quads_ushort2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)in[i+0];
-      (out+j+0)[1] = (ushort)in[i+1];
-      (out+j+0)[2] = (ushort)in[i+3];
-      (out+j+3)[0] = (ushort)in[i+1];
-      (out+j+3)[1] = (ushort)in[i+2];
-      (out+j+3)[2] = (ushort)in[i+3];
-   }
-}
-static void translate_quadstrip_ushort2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)in[i+2];
-      (out+j+0)[1] = (ushort)in[i+0];
-      (out+j+0)[2] = (ushort)in[i+3];
-      (out+j+3)[0] = (ushort)in[i+0];
-      (out+j+3)[1] = (ushort)in[i+1];
-      (out+j+3)[2] = (ushort)in[i+3];
-   }
-}
-static void translate_polygon_ushort2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i+2];
-      (out+j)[2] = (ushort)in[0];
-   }
-}
-static void translate_points_ushort2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)in[i];
-   }
-}
-static void translate_lines_ushort2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)in[i];
-      (out+i)[1] = (uint)in[i+1];
-   }
-}
-static void translate_linestrip_ushort2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-   }
-}
-static void translate_lineloop_ushort2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-   }
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[0];
-}
-static void translate_tris_ushort2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)in[i];
-      (out+i)[1] = (uint)in[i+1];
-      (out+i)[2] = (uint)in[i+2];
-   }
-}
-static void translate_tristrip_ushort2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1+(i&1)];
-      (out+j)[2] = (uint)in[i+2-(i&1)];
-   }
-}
-static void translate_trifan_ushort2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_quads_ushort2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)in[i+0];
-      (out+j+0)[1] = (uint)in[i+1];
-      (out+j+0)[2] = (uint)in[i+3];
-      (out+j+3)[0] = (uint)in[i+1];
-      (out+j+3)[1] = (uint)in[i+2];
-      (out+j+3)[2] = (uint)in[i+3];
-   }
-}
-static void translate_quadstrip_ushort2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)in[i+2];
-      (out+j+0)[1] = (uint)in[i+0];
-      (out+j+0)[2] = (uint)in[i+3];
-      (out+j+3)[0] = (uint)in[i+0];
-      (out+j+3)[1] = (uint)in[i+1];
-      (out+j+3)[2] = (uint)in[i+3];
-   }
-}
-static void translate_polygon_ushort2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_points_ushort2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)in[i];
-   }
-}
-static void translate_lines_ushort2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)in[i+1];
-      (out+i)[1] = (uint)in[i];
-   }
-}
-static void translate_linestrip_ushort2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i];
-   }
-}
-static void translate_lineloop_ushort2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i];
-   }
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i];
-}
-static void translate_tris_ushort2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)in[i+1];
-      (out+i)[1] = (uint)in[i+2];
-      (out+i)[2] = (uint)in[i];
-   }
-}
-static void translate_tristrip_ushort2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+1+(i&1)];
-      (out+j)[1] = (uint)in[i+2-(i&1)];
-      (out+j)[2] = (uint)in[i];
-   }
-}
-static void translate_trifan_ushort2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i+2];
-      (out+j)[2] = (uint)in[0];
-   }
-}
-static void translate_quads_ushort2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)in[i+1];
-      (out+j+0)[1] = (uint)in[i+3];
-      (out+j+0)[2] = (uint)in[i+0];
-      (out+j+3)[0] = (uint)in[i+2];
-      (out+j+3)[1] = (uint)in[i+3];
-      (out+j+3)[2] = (uint)in[i+1];
-   }
-}
-static void translate_quadstrip_ushort2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)in[i+0];
-      (out+j+0)[1] = (uint)in[i+3];
-      (out+j+0)[2] = (uint)in[i+2];
-      (out+j+3)[0] = (uint)in[i+1];
-      (out+j+3)[1] = (uint)in[i+3];
-      (out+j+3)[2] = (uint)in[i+0];
-   }
-}
-static void translate_polygon_ushort2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i+2];
-      (out+j)[2] = (uint)in[0];
-   }
-}
-static void translate_points_ushort2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)in[i];
-   }
-}
-static void translate_lines_ushort2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)in[i+1];
-      (out+i)[1] = (uint)in[i];
-   }
-}
-static void translate_linestrip_ushort2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i];
-   }
-}
-static void translate_lineloop_ushort2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i];
-   }
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i];
-}
-static void translate_tris_ushort2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)in[i+2];
-      (out+i)[1] = (uint)in[i];
-      (out+i)[2] = (uint)in[i+1];
-   }
-}
-static void translate_tristrip_ushort2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+2];
-      (out+j)[1] = (uint)in[i+(i&1)];
-      (out+j)[2] = (uint)in[i+1-(i&1)];
-   }
-}
-static void translate_trifan_ushort2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+2];
-      (out+j)[1] = (uint)in[0];
-      (out+j)[2] = (uint)in[i+1];
-   }
-}
-static void translate_quads_ushort2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)in[i+3];
-      (out+j+0)[1] = (uint)in[i+0];
-      (out+j+0)[2] = (uint)in[i+1];
-      (out+j+3)[0] = (uint)in[i+3];
-      (out+j+3)[1] = (uint)in[i+1];
-      (out+j+3)[2] = (uint)in[i+2];
-   }
-}
-static void translate_quadstrip_ushort2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)in[i+3];
-      (out+j+0)[1] = (uint)in[i+2];
-      (out+j+0)[2] = (uint)in[i+0];
-      (out+j+3)[0] = (uint)in[i+3];
-      (out+j+3)[1] = (uint)in[i+0];
-      (out+j+3)[2] = (uint)in[i+1];
-   }
-}
-static void translate_polygon_ushort2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_points_ushort2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)in[i];
-   }
-}
-static void translate_lines_ushort2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)in[i];
-      (out+i)[1] = (uint)in[i+1];
-   }
-}
-static void translate_linestrip_ushort2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-   }
-}
-static void translate_lineloop_ushort2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-   }
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[0];
-}
-static void translate_tris_ushort2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)in[i];
-      (out+i)[1] = (uint)in[i+1];
-      (out+i)[2] = (uint)in[i+2];
-   }
-}
-static void translate_tristrip_ushort2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+(i&1)];
-      (out+j)[1] = (uint)in[i+1-(i&1)];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_trifan_ushort2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_quads_ushort2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)in[i+0];
-      (out+j+0)[1] = (uint)in[i+1];
-      (out+j+0)[2] = (uint)in[i+3];
-      (out+j+3)[0] = (uint)in[i+1];
-      (out+j+3)[1] = (uint)in[i+2];
-      (out+j+3)[2] = (uint)in[i+3];
-   }
-}
-static void translate_quadstrip_ushort2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)in[i+2];
-      (out+j+0)[1] = (uint)in[i+0];
-      (out+j+0)[2] = (uint)in[i+3];
-      (out+j+3)[0] = (uint)in[i+0];
-      (out+j+3)[1] = (uint)in[i+1];
-      (out+j+3)[2] = (uint)in[i+3];
-   }
-}
-static void translate_polygon_ushort2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i+2];
-      (out+j)[2] = (uint)in[0];
-   }
-}
-static void translate_points_uint2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)in[i];
-   }
-}
-static void translate_lines_uint2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)in[i];
-      (out+i)[1] = (ushort)in[i+1];
-   }
-}
-static void translate_linestrip_uint2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-   }
-}
-static void translate_lineloop_uint2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-   }
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[0];
-}
-static void translate_tris_uint2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)in[i];
-      (out+i)[1] = (ushort)in[i+1];
-      (out+i)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_tristrip_uint2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1+(i&1)];
-      (out+j)[2] = (ushort)in[i+2-(i&1)];
-   }
-}
-static void translate_trifan_uint2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_quads_uint2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)in[i+0];
-      (out+j+0)[1] = (ushort)in[i+1];
-      (out+j+0)[2] = (ushort)in[i+3];
-      (out+j+3)[0] = (ushort)in[i+1];
-      (out+j+3)[1] = (ushort)in[i+2];
-      (out+j+3)[2] = (ushort)in[i+3];
-   }
-}
-static void translate_quadstrip_uint2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)in[i+2];
-      (out+j+0)[1] = (ushort)in[i+0];
-      (out+j+0)[2] = (ushort)in[i+3];
-      (out+j+3)[0] = (ushort)in[i+0];
-      (out+j+3)[1] = (ushort)in[i+1];
-      (out+j+3)[2] = (ushort)in[i+3];
-   }
-}
-static void translate_polygon_uint2ushort_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_points_uint2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)in[i];
-   }
-}
-static void translate_lines_uint2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)in[i+1];
-      (out+i)[1] = (ushort)in[i];
-   }
-}
-static void translate_linestrip_uint2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i];
-   }
-}
-static void translate_lineloop_uint2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i];
-   }
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i];
-}
-static void translate_tris_uint2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)in[i+1];
-      (out+i)[1] = (ushort)in[i+2];
-      (out+i)[2] = (ushort)in[i];
-   }
-}
-static void translate_tristrip_uint2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+1+(i&1)];
-      (out+j)[1] = (ushort)in[i+2-(i&1)];
-      (out+j)[2] = (ushort)in[i];
-   }
-}
-static void translate_trifan_uint2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i+2];
-      (out+j)[2] = (ushort)in[0];
-   }
-}
-static void translate_quads_uint2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)in[i+1];
-      (out+j+0)[1] = (ushort)in[i+3];
-      (out+j+0)[2] = (ushort)in[i+0];
-      (out+j+3)[0] = (ushort)in[i+2];
-      (out+j+3)[1] = (ushort)in[i+3];
-      (out+j+3)[2] = (ushort)in[i+1];
-   }
-}
-static void translate_quadstrip_uint2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)in[i+0];
-      (out+j+0)[1] = (ushort)in[i+3];
-      (out+j+0)[2] = (ushort)in[i+2];
-      (out+j+3)[0] = (ushort)in[i+1];
-      (out+j+3)[1] = (ushort)in[i+3];
-      (out+j+3)[2] = (ushort)in[i+0];
-   }
-}
-static void translate_polygon_uint2ushort_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i+2];
-      (out+j)[2] = (ushort)in[0];
-   }
-}
-static void translate_points_uint2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)in[i];
-   }
-}
-static void translate_lines_uint2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)in[i+1];
-      (out+i)[1] = (ushort)in[i];
-   }
-}
-static void translate_linestrip_uint2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i];
-   }
-}
-static void translate_lineloop_uint2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i];
-   }
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i];
-}
-static void translate_tris_uint2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)in[i+2];
-      (out+i)[1] = (ushort)in[i];
-      (out+i)[2] = (ushort)in[i+1];
-   }
-}
-static void translate_tristrip_uint2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+2];
-      (out+j)[1] = (ushort)in[i+(i&1)];
-      (out+j)[2] = (ushort)in[i+1-(i&1)];
-   }
-}
-static void translate_trifan_uint2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+2];
-      (out+j)[1] = (ushort)in[0];
-      (out+j)[2] = (ushort)in[i+1];
-   }
-}
-static void translate_quads_uint2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)in[i+3];
-      (out+j+0)[1] = (ushort)in[i+0];
-      (out+j+0)[2] = (ushort)in[i+1];
-      (out+j+3)[0] = (ushort)in[i+3];
-      (out+j+3)[1] = (ushort)in[i+1];
-      (out+j+3)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_quadstrip_uint2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)in[i+3];
-      (out+j+0)[1] = (ushort)in[i+2];
-      (out+j+0)[2] = (ushort)in[i+0];
-      (out+j+3)[0] = (ushort)in[i+3];
-      (out+j+3)[1] = (ushort)in[i+0];
-      (out+j+3)[2] = (ushort)in[i+1];
-   }
-}
-static void translate_polygon_uint2ushort_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_points_uint2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (ushort)in[i];
-   }
-}
-static void translate_lines_uint2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (ushort)in[i];
-      (out+i)[1] = (ushort)in[i+1];
-   }
-}
-static void translate_linestrip_uint2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-   }
-}
-static void translate_lineloop_uint2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-   }
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[0];
-}
-static void translate_tris_uint2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (ushort)in[i];
-      (out+i)[1] = (ushort)in[i+1];
-      (out+i)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_tristrip_uint2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+(i&1)];
-      (out+j)[1] = (ushort)in[i+1-(i&1)];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_trifan_uint2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j)[2] = (ushort)in[i+2];
-   }
-}
-static void translate_quads_uint2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (ushort)in[i+0];
-      (out+j+0)[1] = (ushort)in[i+1];
-      (out+j+0)[2] = (ushort)in[i+3];
-      (out+j+3)[0] = (ushort)in[i+1];
-      (out+j+3)[1] = (ushort)in[i+2];
-      (out+j+3)[2] = (ushort)in[i+3];
-   }
-}
-static void translate_quadstrip_uint2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (ushort)in[i+2];
-      (out+j+0)[1] = (ushort)in[i+0];
-      (out+j+0)[2] = (ushort)in[i+3];
-      (out+j+3)[0] = (ushort)in[i+0];
-      (out+j+3)[1] = (ushort)in[i+1];
-      (out+j+3)[2] = (ushort)in[i+3];
-   }
-}
-static void translate_polygon_uint2ushort_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (ushort)in[i+1];
-      (out+j)[1] = (ushort)in[i+2];
-      (out+j)[2] = (ushort)in[0];
-   }
-}
-static void translate_points_uint2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)in[i];
-   }
-}
-static void translate_lines_uint2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)in[i];
-      (out+i)[1] = (uint)in[i+1];
-   }
-}
-static void translate_linestrip_uint2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-   }
-}
-static void translate_lineloop_uint2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-   }
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[0];
-}
-static void translate_tris_uint2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)in[i];
-      (out+i)[1] = (uint)in[i+1];
-      (out+i)[2] = (uint)in[i+2];
-   }
-}
-static void translate_tristrip_uint2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1+(i&1)];
-      (out+j)[2] = (uint)in[i+2-(i&1)];
-   }
-}
-static void translate_trifan_uint2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_quads_uint2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)in[i+0];
-      (out+j+0)[1] = (uint)in[i+1];
-      (out+j+0)[2] = (uint)in[i+3];
-      (out+j+3)[0] = (uint)in[i+1];
-      (out+j+3)[1] = (uint)in[i+2];
-      (out+j+3)[2] = (uint)in[i+3];
-   }
-}
-static void translate_quadstrip_uint2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)in[i+2];
-      (out+j+0)[1] = (uint)in[i+0];
-      (out+j+0)[2] = (uint)in[i+3];
-      (out+j+3)[0] = (uint)in[i+0];
-      (out+j+3)[1] = (uint)in[i+1];
-      (out+j+3)[2] = (uint)in[i+3];
-   }
-}
-static void translate_polygon_uint2uint_first2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_points_uint2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)in[i];
-   }
-}
-static void translate_lines_uint2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)in[i+1];
-      (out+i)[1] = (uint)in[i];
-   }
-}
-static void translate_linestrip_uint2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i];
-   }
-}
-static void translate_lineloop_uint2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i];
-   }
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i];
-}
-static void translate_tris_uint2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)in[i+1];
-      (out+i)[1] = (uint)in[i+2];
-      (out+i)[2] = (uint)in[i];
-   }
-}
-static void translate_tristrip_uint2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+1+(i&1)];
-      (out+j)[1] = (uint)in[i+2-(i&1)];
-      (out+j)[2] = (uint)in[i];
-   }
-}
-static void translate_trifan_uint2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i+2];
-      (out+j)[2] = (uint)in[0];
-   }
-}
-static void translate_quads_uint2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)in[i+1];
-      (out+j+0)[1] = (uint)in[i+3];
-      (out+j+0)[2] = (uint)in[i+0];
-      (out+j+3)[0] = (uint)in[i+2];
-      (out+j+3)[1] = (uint)in[i+3];
-      (out+j+3)[2] = (uint)in[i+1];
-   }
-}
-static void translate_quadstrip_uint2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)in[i+0];
-      (out+j+0)[1] = (uint)in[i+3];
-      (out+j+0)[2] = (uint)in[i+2];
-      (out+j+3)[0] = (uint)in[i+1];
-      (out+j+3)[1] = (uint)in[i+3];
-      (out+j+3)[2] = (uint)in[i+0];
-   }
-}
-static void translate_polygon_uint2uint_first2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i+2];
-      (out+j)[2] = (uint)in[0];
-   }
-}
-static void translate_points_uint2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)in[i];
-   }
-}
-static void translate_lines_uint2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)in[i+1];
-      (out+i)[1] = (uint)in[i];
-   }
-}
-static void translate_linestrip_uint2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i];
-   }
-}
-static void translate_lineloop_uint2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i];
-   }
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i];
-}
-static void translate_tris_uint2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)in[i+2];
-      (out+i)[1] = (uint)in[i];
-      (out+i)[2] = (uint)in[i+1];
-   }
-}
-static void translate_tristrip_uint2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+2];
-      (out+j)[1] = (uint)in[i+(i&1)];
-      (out+j)[2] = (uint)in[i+1-(i&1)];
-   }
-}
-static void translate_trifan_uint2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+2];
-      (out+j)[1] = (uint)in[0];
-      (out+j)[2] = (uint)in[i+1];
-   }
-}
-static void translate_quads_uint2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)in[i+3];
-      (out+j+0)[1] = (uint)in[i+0];
-      (out+j+0)[2] = (uint)in[i+1];
-      (out+j+3)[0] = (uint)in[i+3];
-      (out+j+3)[1] = (uint)in[i+1];
-      (out+j+3)[2] = (uint)in[i+2];
-   }
-}
-static void translate_quadstrip_uint2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)in[i+3];
-      (out+j+0)[1] = (uint)in[i+2];
-      (out+j+0)[2] = (uint)in[i+0];
-      (out+j+3)[0] = (uint)in[i+3];
-      (out+j+3)[1] = (uint)in[i+0];
-      (out+j+3)[2] = (uint)in[i+1];
-   }
-}
-static void translate_polygon_uint2uint_last2first(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_points_uint2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i++) { 
-      (out+i)[0] = (uint)in[i];
-   }
-}
-static void translate_lines_uint2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=2) { 
-      (out+i)[0] = (uint)in[i];
-      (out+i)[1] = (uint)in[i+1];
-   }
-}
-static void translate_linestrip_uint2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-   }
-}
-static void translate_lineloop_uint2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr - 2; j+=2, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-   }
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[0];
-}
-static void translate_tris_uint2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (i = 0; i < nr; i+=3) { 
-      (out+i)[0] = (uint)in[i];
-      (out+i)[1] = (uint)in[i+1];
-      (out+i)[2] = (uint)in[i+2];
-   }
-}
-static void translate_tristrip_uint2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+(i&1)];
-      (out+j)[1] = (uint)in[i+1-(i&1)];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_trifan_uint2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j)[2] = (uint)in[i+2];
-   }
-}
-static void translate_quads_uint2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=4) { 
-      (out+j+0)[0] = (uint)in[i+0];
-      (out+j+0)[1] = (uint)in[i+1];
-      (out+j+0)[2] = (uint)in[i+3];
-      (out+j+3)[0] = (uint)in[i+1];
-      (out+j+3)[1] = (uint)in[i+2];
-      (out+j+3)[2] = (uint)in[i+3];
-   }
-}
-static void translate_quadstrip_uint2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=2) { 
-      (out+j+0)[0] = (uint)in[i+2];
-      (out+j+0)[1] = (uint)in[i+0];
-      (out+j+0)[2] = (uint)in[i+3];
-      (out+j+3)[0] = (uint)in[i+0];
-      (out+j+3)[1] = (uint)in[i+1];
-      (out+j+3)[2] = (uint)in[i+3];
-   }
-}
-static void translate_polygon_uint2uint_last2last(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=3, i++) { 
-      (out+j)[0] = (uint)in[i+1];
-      (out+j)[1] = (uint)in[i+2];
-      (out+j)[2] = (uint)in[0];
-   }
-}
-void u_index_init( void )
-{
-  static int firsttime = 1;
-  if (!firsttime) return;
-  firsttime = 0;
-generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_ushort_first2first;
-generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_ushort_first2first;
-generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_first2first;
-generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_first2first;
-generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_first2first;
-generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_first2first;
-generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_first2first;
-generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_ushort_first2first;
-generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_first2first;
-generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_first2first;
-generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_ushort_first2last;
-generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_ushort_first2last;
-generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_first2last;
-generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_first2last;
-generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_first2last;
-generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_first2last;
-generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_first2last;
-generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_ushort_first2last;
-generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_first2last;
-generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_first2last;
-generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_ushort_last2first;
-generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_ushort_last2first;
-generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_last2first;
-generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_last2first;
-generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_last2first;
-generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_last2first;
-generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_last2first;
-generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_ushort_last2first;
-generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_last2first;
-generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_last2first;
-generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_ushort_last2last;
-generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_ushort_last2last;
-generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_last2last;
-generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_last2last;
-generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_last2last;
-generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_last2last;
-generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_last2last;
-generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_ushort_last2last;
-generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_last2last;
-generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_last2last;
-generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_uint_first2first;
-generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_uint_first2first;
-generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_first2first;
-generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_first2first;
-generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_first2first;
-generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_first2first;
-generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_first2first;
-generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_uint_first2first;
-generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_first2first;
-generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_uint_first2first;
-generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_uint_first2last;
-generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_uint_first2last;
-generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_first2last;
-generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_first2last;
-generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_first2last;
-generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_first2last;
-generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_first2last;
-generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_uint_first2last;
-generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_first2last;
-generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_uint_first2last;
-generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_uint_last2first;
-generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_uint_last2first;
-generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_last2first;
-generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_last2first;
-generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_last2first;
-generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_last2first;
-generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_last2first;
-generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_uint_last2first;
-generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_last2first;
-generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_uint_last2first;
-generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_uint_last2last;
-generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_uint_last2last;
-generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_last2last;
-generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_last2last;
-generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_last2last;
-generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_last2last;
-generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_last2last;
-generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_uint_last2last;
-generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_last2last;
-generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_uint_last2last;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_first2first;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_first2first;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_first2first;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_first2first;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_first2first;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_first2first;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_first2first;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_first2first;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_first2first;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_first2first;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_first2last;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_first2last;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_first2last;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_first2last;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_first2last;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_first2last;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_first2last;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_first2last;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_first2last;
-translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_first2last;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_last2first;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_last2first;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_last2first;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_last2first;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_last2first;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_last2first;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_last2first;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_last2first;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_last2first;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_last2first;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_last2last;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_last2last;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_last2last;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_last2last;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_last2last;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_last2last;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_last2last;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_last2last;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_last2last;
-translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_last2last;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_first2first;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_first2first;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_first2first;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_first2first;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_first2first;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_first2first;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_first2first;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_first2first;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_first2first;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_first2first;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_first2last;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_first2last;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_first2last;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_first2last;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_first2last;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_first2last;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_first2last;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_first2last;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_first2last;
-translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_first2last;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_last2first;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_last2first;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_last2first;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_last2first;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_last2first;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_last2first;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_last2first;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_last2first;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_last2first;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_last2first;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_last2last;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_last2last;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_last2last;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_last2last;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_last2last;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_last2last;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_last2last;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_last2last;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_last2last;
-translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_last2last;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_first2first;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_first2first;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_first2first;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_first2first;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_first2first;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_first2first;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_first2first;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_first2first;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_first2first;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_first2first;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_first2last;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_first2last;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_first2last;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_first2last;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_first2last;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_first2last;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_first2last;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_first2last;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_first2last;
-translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_first2last;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_last2first;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_last2first;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_last2first;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_last2first;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_last2first;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_last2first;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_last2first;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_last2first;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_last2first;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_last2first;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_last2last;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_last2last;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_last2last;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_last2last;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_last2last;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_last2last;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_last2last;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_last2last;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_last2last;
-translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_last2last;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_first2first;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_first2first;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_first2first;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_first2first;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_first2first;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_first2first;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_first2first;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_first2first;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_first2first;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_first2first;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_first2last;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_first2last;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_first2last;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_first2last;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_first2last;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_first2last;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_first2last;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_first2last;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_first2last;
-translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_first2last;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_last2first;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_last2first;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_last2first;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_last2first;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_last2first;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_last2first;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_last2first;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_last2first;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_last2first;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_last2first;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_last2last;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_last2last;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_last2last;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_last2last;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_last2last;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_last2last;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_last2last;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_last2last;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_last2last;
-translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_last2last;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_first2first;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_first2first;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_first2first;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_first2first;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_first2first;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_first2first;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_first2first;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_first2first;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_first2first;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_first2first;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_first2last;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_first2last;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_first2last;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_first2last;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_first2last;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_first2last;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_first2last;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_first2last;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_first2last;
-translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_first2last;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_last2first;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_last2first;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_last2first;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_last2first;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_last2first;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_last2first;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_last2first;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_last2first;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_last2first;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_last2first;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_last2last;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_last2last;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_last2last;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_last2last;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_last2last;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_last2last;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_last2last;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_last2last;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_last2last;
-translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_last2last;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2uint_first2first;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2uint_first2first;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_first2first;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_first2first;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_first2first;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_first2first;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_first2first;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_first2first;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_first2first;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_first2first;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2uint_first2last;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2uint_first2last;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_first2last;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_first2last;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_first2last;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_first2last;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_first2last;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_first2last;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_first2last;
-translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_first2last;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2uint_last2first;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2uint_last2first;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_last2first;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_last2first;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_last2first;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_last2first;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_last2first;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_last2first;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_last2first;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_last2first;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2uint_last2last;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2uint_last2last;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_last2last;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_last2last;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_last2last;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_last2last;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_last2last;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_last2last;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_last2last;
-translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_last2last;
-}
-#include "indices/u_indices.c"
diff --git a/src/gallium/auxiliary/indices/u_unfilled_gen.c b/src/gallium/auxiliary/indices/u_unfilled_gen.c
deleted file mode 100644
index 93897c98deb..00000000000
--- a/src/gallium/auxiliary/indices/u_unfilled_gen.c
+++ /dev/null
@@ -1,992 +0,0 @@
-/* File automatically generated by u_unfilled_gen.py */
-
-/*
- * Copyright 2009 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
- * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-
-
-/**
- * @file
- * Functions to translate and generate index lists
- */
-
-#include "indices/u_indices.h"
-#include "indices/u_indices_priv.h"
-#include "pipe/p_compiler.h"
-#include "util/u_debug.h"
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-
-
-static unsigned out_size_idx( unsigned index_size )
-{
-   switch (index_size) {
-   case 4: return OUT_UINT;
-   case 2: return OUT_USHORT;
-   default: assert(0); return OUT_USHORT;
-   }
-}
-
-static unsigned in_size_idx( unsigned index_size )
-{
-   switch (index_size) {
-   case 4: return IN_UINT;
-   case 2: return IN_USHORT;
-   case 1: return IN_UBYTE;
-   default: assert(0); return IN_UBYTE;
-   }
-}
-
-
-static u_generate_func generate_line[OUT_COUNT][PRIM_COUNT];
-static u_translate_func translate_line[IN_COUNT][OUT_COUNT][PRIM_COUNT];
-
-
-static void generate_tris_ushort(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=3) { 
-      (out+j)[0] = (ushort)(i);
-      (out+j)[1] = (ushort)(i+1);
-      (out+j+2)[0] = (ushort)(i+1);
-      (out+j+2)[1] = (ushort)(i+2);
-      (out+j+4)[0] = (ushort)(i+2);
-      (out+j+4)[1] = (ushort)(i);
-   }
-}
-static void generate_tristrip_ushort(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (ushort)(i);
-      (out+j)[1] = (ushort)(i+1/*+(i&1)*/);
-      (out+j+2)[0] = (ushort)(i+1/*+(i&1)*/);
-      (out+j+2)[1] = (ushort)(i+2/*-(i&1)*/);
-      (out+j+4)[0] = (ushort)(i+2/*-(i&1)*/);
-      (out+j+4)[1] = (ushort)(i);
-   }
-}
-static void generate_trifan_ushort(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (ushort)(0);
-      (out+j)[1] = (ushort)(i+1);
-      (out+j+2)[0] = (ushort)(i+1);
-      (out+j+2)[1] = (ushort)(i+2);
-      (out+j+4)[0] = (ushort)(i+2);
-      (out+j+4)[1] = (ushort)(0);
-   }
-}
-static void generate_quads_ushort(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=4) { 
-      (out+j)[0] = (ushort)(i+0);
-      (out+j)[1] = (ushort)(i+1);
-      (out+j+2)[0] = (ushort)(i+1);
-      (out+j+2)[1] = (ushort)(i+2);
-      (out+j+4)[0] = (ushort)(i+2);
-      (out+j+4)[1] = (ushort)(i+3);
-      (out+j+6)[0] = (ushort)(i+3);
-      (out+j+6)[1] = (ushort)(i+0);
-   }
-}
-static void generate_quadstrip_ushort(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=2) { 
-      (out+j)[0] = (ushort)(i+2);
-      (out+j)[1] = (ushort)(i+0);
-      (out+j+2)[0] = (ushort)(i+0);
-      (out+j+2)[1] = (ushort)(i+1);
-      (out+j+4)[0] = (ushort)(i+1);
-      (out+j+4)[1] = (ushort)(i+3);
-      (out+j+6)[0] = (ushort)(i+3);
-      (out+j+6)[1] = (ushort)(i+2);
-   }
-}
-static void generate_polygon_ushort(
-    unsigned nr,
-    void *_out )
-{
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (ushort)(0);
-      (out+j)[1] = (ushort)(i+1);
-      (out+j+2)[0] = (ushort)(i+1);
-      (out+j+2)[1] = (ushort)(i+2);
-      (out+j+4)[0] = (ushort)(i+2);
-      (out+j+4)[1] = (ushort)(0);
-   }
-}
-static void generate_tris_uint(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=3) { 
-      (out+j)[0] = (uint)(i);
-      (out+j)[1] = (uint)(i+1);
-      (out+j+2)[0] = (uint)(i+1);
-      (out+j+2)[1] = (uint)(i+2);
-      (out+j+4)[0] = (uint)(i+2);
-      (out+j+4)[1] = (uint)(i);
-   }
-}
-static void generate_tristrip_uint(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (uint)(i);
-      (out+j)[1] = (uint)(i+1/*+(i&1)*/);
-      (out+j+2)[0] = (uint)(i+1/*+(i&1)*/);
-      (out+j+2)[1] = (uint)(i+2/*-(i&1)*/);
-      (out+j+4)[0] = (uint)(i+2/*-(i&1)*/);
-      (out+j+4)[1] = (uint)(i);
-   }
-}
-static void generate_trifan_uint(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (uint)(0);
-      (out+j)[1] = (uint)(i+1);
-      (out+j+2)[0] = (uint)(i+1);
-      (out+j+2)[1] = (uint)(i+2);
-      (out+j+4)[0] = (uint)(i+2);
-      (out+j+4)[1] = (uint)(0);
-   }
-}
-static void generate_quads_uint(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=4) { 
-      (out+j)[0] = (uint)(i+0);
-      (out+j)[1] = (uint)(i+1);
-      (out+j+2)[0] = (uint)(i+1);
-      (out+j+2)[1] = (uint)(i+2);
-      (out+j+4)[0] = (uint)(i+2);
-      (out+j+4)[1] = (uint)(i+3);
-      (out+j+6)[0] = (uint)(i+3);
-      (out+j+6)[1] = (uint)(i+0);
-   }
-}
-static void generate_quadstrip_uint(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=2) { 
-      (out+j)[0] = (uint)(i+2);
-      (out+j)[1] = (uint)(i+0);
-      (out+j+2)[0] = (uint)(i+0);
-      (out+j+2)[1] = (uint)(i+1);
-      (out+j+4)[0] = (uint)(i+1);
-      (out+j+4)[1] = (uint)(i+3);
-      (out+j+6)[0] = (uint)(i+3);
-      (out+j+6)[1] = (uint)(i+2);
-   }
-}
-static void generate_polygon_uint(
-    unsigned nr,
-    void *_out )
-{
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (uint)(0);
-      (out+j)[1] = (uint)(i+1);
-      (out+j+2)[0] = (uint)(i+1);
-      (out+j+2)[1] = (uint)(i+2);
-      (out+j+4)[0] = (uint)(i+2);
-      (out+j+4)[1] = (uint)(0);
-   }
-}
-static void translate_tris_ubyte2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=3) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j+2)[0] = (ushort)in[i+1];
-      (out+j+2)[1] = (ushort)in[i+2];
-      (out+j+4)[0] = (ushort)in[i+2];
-      (out+j+4)[1] = (ushort)in[i];
-   }
-}
-static void translate_tristrip_ubyte2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1/*+(i&1)*/];
-      (out+j+2)[0] = (ushort)in[i+1/*+(i&1)*/];
-      (out+j+2)[1] = (ushort)in[i+2/*-(i&1)*/];
-      (out+j+4)[0] = (ushort)in[i+2/*-(i&1)*/];
-      (out+j+4)[1] = (ushort)in[i];
-   }
-}
-static void translate_trifan_ubyte2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j+2)[0] = (ushort)in[i+1];
-      (out+j+2)[1] = (ushort)in[i+2];
-      (out+j+4)[0] = (ushort)in[i+2];
-      (out+j+4)[1] = (ushort)in[0];
-   }
-}
-static void translate_quads_ubyte2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=4) { 
-      (out+j)[0] = (ushort)in[i+0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j+2)[0] = (ushort)in[i+1];
-      (out+j+2)[1] = (ushort)in[i+2];
-      (out+j+4)[0] = (ushort)in[i+2];
-      (out+j+4)[1] = (ushort)in[i+3];
-      (out+j+6)[0] = (ushort)in[i+3];
-      (out+j+6)[1] = (ushort)in[i+0];
-   }
-}
-static void translate_quadstrip_ubyte2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=2) { 
-      (out+j)[0] = (ushort)in[i+2];
-      (out+j)[1] = (ushort)in[i+0];
-      (out+j+2)[0] = (ushort)in[i+0];
-      (out+j+2)[1] = (ushort)in[i+1];
-      (out+j+4)[0] = (ushort)in[i+1];
-      (out+j+4)[1] = (ushort)in[i+3];
-      (out+j+6)[0] = (ushort)in[i+3];
-      (out+j+6)[1] = (ushort)in[i+2];
-   }
-}
-static void translate_polygon_ubyte2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j+2)[0] = (ushort)in[i+1];
-      (out+j+2)[1] = (ushort)in[i+2];
-      (out+j+4)[0] = (ushort)in[i+2];
-      (out+j+4)[1] = (ushort)in[0];
-   }
-}
-static void translate_tris_ubyte2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=3) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j+2)[0] = (uint)in[i+1];
-      (out+j+2)[1] = (uint)in[i+2];
-      (out+j+4)[0] = (uint)in[i+2];
-      (out+j+4)[1] = (uint)in[i];
-   }
-}
-static void translate_tristrip_ubyte2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1/*+(i&1)*/];
-      (out+j+2)[0] = (uint)in[i+1/*+(i&1)*/];
-      (out+j+2)[1] = (uint)in[i+2/*-(i&1)*/];
-      (out+j+4)[0] = (uint)in[i+2/*-(i&1)*/];
-      (out+j+4)[1] = (uint)in[i];
-   }
-}
-static void translate_trifan_ubyte2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j+2)[0] = (uint)in[i+1];
-      (out+j+2)[1] = (uint)in[i+2];
-      (out+j+4)[0] = (uint)in[i+2];
-      (out+j+4)[1] = (uint)in[0];
-   }
-}
-static void translate_quads_ubyte2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=4) { 
-      (out+j)[0] = (uint)in[i+0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j+2)[0] = (uint)in[i+1];
-      (out+j+2)[1] = (uint)in[i+2];
-      (out+j+4)[0] = (uint)in[i+2];
-      (out+j+4)[1] = (uint)in[i+3];
-      (out+j+6)[0] = (uint)in[i+3];
-      (out+j+6)[1] = (uint)in[i+0];
-   }
-}
-static void translate_quadstrip_ubyte2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=2) { 
-      (out+j)[0] = (uint)in[i+2];
-      (out+j)[1] = (uint)in[i+0];
-      (out+j+2)[0] = (uint)in[i+0];
-      (out+j+2)[1] = (uint)in[i+1];
-      (out+j+4)[0] = (uint)in[i+1];
-      (out+j+4)[1] = (uint)in[i+3];
-      (out+j+6)[0] = (uint)in[i+3];
-      (out+j+6)[1] = (uint)in[i+2];
-   }
-}
-static void translate_polygon_ubyte2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ubyte*in = (const ubyte*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j+2)[0] = (uint)in[i+1];
-      (out+j+2)[1] = (uint)in[i+2];
-      (out+j+4)[0] = (uint)in[i+2];
-      (out+j+4)[1] = (uint)in[0];
-   }
-}
-static void translate_tris_ushort2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=3) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j+2)[0] = (ushort)in[i+1];
-      (out+j+2)[1] = (ushort)in[i+2];
-      (out+j+4)[0] = (ushort)in[i+2];
-      (out+j+4)[1] = (ushort)in[i];
-   }
-}
-static void translate_tristrip_ushort2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1/*+(i&1)*/];
-      (out+j+2)[0] = (ushort)in[i+1/*+(i&1)*/];
-      (out+j+2)[1] = (ushort)in[i+2/*-(i&1)*/];
-      (out+j+4)[0] = (ushort)in[i+2/*-(i&1)*/];
-      (out+j+4)[1] = (ushort)in[i];
-   }
-}
-static void translate_trifan_ushort2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j+2)[0] = (ushort)in[i+1];
-      (out+j+2)[1] = (ushort)in[i+2];
-      (out+j+4)[0] = (ushort)in[i+2];
-      (out+j+4)[1] = (ushort)in[0];
-   }
-}
-static void translate_quads_ushort2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=4) { 
-      (out+j)[0] = (ushort)in[i+0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j+2)[0] = (ushort)in[i+1];
-      (out+j+2)[1] = (ushort)in[i+2];
-      (out+j+4)[0] = (ushort)in[i+2];
-      (out+j+4)[1] = (ushort)in[i+3];
-      (out+j+6)[0] = (ushort)in[i+3];
-      (out+j+6)[1] = (ushort)in[i+0];
-   }
-}
-static void translate_quadstrip_ushort2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=2) { 
-      (out+j)[0] = (ushort)in[i+2];
-      (out+j)[1] = (ushort)in[i+0];
-      (out+j+2)[0] = (ushort)in[i+0];
-      (out+j+2)[1] = (ushort)in[i+1];
-      (out+j+4)[0] = (ushort)in[i+1];
-      (out+j+4)[1] = (ushort)in[i+3];
-      (out+j+6)[0] = (ushort)in[i+3];
-      (out+j+6)[1] = (ushort)in[i+2];
-   }
-}
-static void translate_polygon_ushort2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j+2)[0] = (ushort)in[i+1];
-      (out+j+2)[1] = (ushort)in[i+2];
-      (out+j+4)[0] = (ushort)in[i+2];
-      (out+j+4)[1] = (ushort)in[0];
-   }
-}
-static void translate_tris_ushort2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=3) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j+2)[0] = (uint)in[i+1];
-      (out+j+2)[1] = (uint)in[i+2];
-      (out+j+4)[0] = (uint)in[i+2];
-      (out+j+4)[1] = (uint)in[i];
-   }
-}
-static void translate_tristrip_ushort2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1/*+(i&1)*/];
-      (out+j+2)[0] = (uint)in[i+1/*+(i&1)*/];
-      (out+j+2)[1] = (uint)in[i+2/*-(i&1)*/];
-      (out+j+4)[0] = (uint)in[i+2/*-(i&1)*/];
-      (out+j+4)[1] = (uint)in[i];
-   }
-}
-static void translate_trifan_ushort2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j+2)[0] = (uint)in[i+1];
-      (out+j+2)[1] = (uint)in[i+2];
-      (out+j+4)[0] = (uint)in[i+2];
-      (out+j+4)[1] = (uint)in[0];
-   }
-}
-static void translate_quads_ushort2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=4) { 
-      (out+j)[0] = (uint)in[i+0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j+2)[0] = (uint)in[i+1];
-      (out+j+2)[1] = (uint)in[i+2];
-      (out+j+4)[0] = (uint)in[i+2];
-      (out+j+4)[1] = (uint)in[i+3];
-      (out+j+6)[0] = (uint)in[i+3];
-      (out+j+6)[1] = (uint)in[i+0];
-   }
-}
-static void translate_quadstrip_ushort2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=2) { 
-      (out+j)[0] = (uint)in[i+2];
-      (out+j)[1] = (uint)in[i+0];
-      (out+j+2)[0] = (uint)in[i+0];
-      (out+j+2)[1] = (uint)in[i+1];
-      (out+j+4)[0] = (uint)in[i+1];
-      (out+j+4)[1] = (uint)in[i+3];
-      (out+j+6)[0] = (uint)in[i+3];
-      (out+j+6)[1] = (uint)in[i+2];
-   }
-}
-static void translate_polygon_ushort2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const ushort*in = (const ushort*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j+2)[0] = (uint)in[i+1];
-      (out+j+2)[1] = (uint)in[i+2];
-      (out+j+4)[0] = (uint)in[i+2];
-      (out+j+4)[1] = (uint)in[0];
-   }
-}
-static void translate_tris_uint2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=3) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j+2)[0] = (ushort)in[i+1];
-      (out+j+2)[1] = (ushort)in[i+2];
-      (out+j+4)[0] = (ushort)in[i+2];
-      (out+j+4)[1] = (ushort)in[i];
-   }
-}
-static void translate_tristrip_uint2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (ushort)in[i];
-      (out+j)[1] = (ushort)in[i+1/*+(i&1)*/];
-      (out+j+2)[0] = (ushort)in[i+1/*+(i&1)*/];
-      (out+j+2)[1] = (ushort)in[i+2/*-(i&1)*/];
-      (out+j+4)[0] = (ushort)in[i+2/*-(i&1)*/];
-      (out+j+4)[1] = (ushort)in[i];
-   }
-}
-static void translate_trifan_uint2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j+2)[0] = (ushort)in[i+1];
-      (out+j+2)[1] = (ushort)in[i+2];
-      (out+j+4)[0] = (ushort)in[i+2];
-      (out+j+4)[1] = (ushort)in[0];
-   }
-}
-static void translate_quads_uint2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=4) { 
-      (out+j)[0] = (ushort)in[i+0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j+2)[0] = (ushort)in[i+1];
-      (out+j+2)[1] = (ushort)in[i+2];
-      (out+j+4)[0] = (ushort)in[i+2];
-      (out+j+4)[1] = (ushort)in[i+3];
-      (out+j+6)[0] = (ushort)in[i+3];
-      (out+j+6)[1] = (ushort)in[i+0];
-   }
-}
-static void translate_quadstrip_uint2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=2) { 
-      (out+j)[0] = (ushort)in[i+2];
-      (out+j)[1] = (ushort)in[i+0];
-      (out+j+2)[0] = (ushort)in[i+0];
-      (out+j+2)[1] = (ushort)in[i+1];
-      (out+j+4)[0] = (ushort)in[i+1];
-      (out+j+4)[1] = (ushort)in[i+3];
-      (out+j+6)[0] = (ushort)in[i+3];
-      (out+j+6)[1] = (ushort)in[i+2];
-   }
-}
-static void translate_polygon_uint2ushort(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  ushort *out = (ushort*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (ushort)in[0];
-      (out+j)[1] = (ushort)in[i+1];
-      (out+j+2)[0] = (ushort)in[i+1];
-      (out+j+2)[1] = (ushort)in[i+2];
-      (out+j+4)[0] = (ushort)in[i+2];
-      (out+j+4)[1] = (ushort)in[0];
-   }
-}
-static void translate_tris_uint2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i+=3) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j+2)[0] = (uint)in[i+1];
-      (out+j+2)[1] = (uint)in[i+2];
-      (out+j+4)[0] = (uint)in[i+2];
-      (out+j+4)[1] = (uint)in[i];
-   }
-}
-static void translate_tristrip_uint2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (uint)in[i];
-      (out+j)[1] = (uint)in[i+1/*+(i&1)*/];
-      (out+j+2)[0] = (uint)in[i+1/*+(i&1)*/];
-      (out+j+2)[1] = (uint)in[i+2/*-(i&1)*/];
-      (out+j+4)[0] = (uint)in[i+2/*-(i&1)*/];
-      (out+j+4)[1] = (uint)in[i];
-   }
-}
-static void translate_trifan_uint2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j+2)[0] = (uint)in[i+1];
-      (out+j+2)[1] = (uint)in[i+2];
-      (out+j+4)[0] = (uint)in[i+2];
-      (out+j+4)[1] = (uint)in[0];
-   }
-}
-static void translate_quads_uint2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=4) { 
-      (out+j)[0] = (uint)in[i+0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j+2)[0] = (uint)in[i+1];
-      (out+j+2)[1] = (uint)in[i+2];
-      (out+j+4)[0] = (uint)in[i+2];
-      (out+j+4)[1] = (uint)in[i+3];
-      (out+j+6)[0] = (uint)in[i+3];
-      (out+j+6)[1] = (uint)in[i+0];
-   }
-}
-static void translate_quadstrip_uint2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=8, i+=2) { 
-      (out+j)[0] = (uint)in[i+2];
-      (out+j)[1] = (uint)in[i+0];
-      (out+j+2)[0] = (uint)in[i+0];
-      (out+j+2)[1] = (uint)in[i+1];
-      (out+j+4)[0] = (uint)in[i+1];
-      (out+j+4)[1] = (uint)in[i+3];
-      (out+j+6)[0] = (uint)in[i+3];
-      (out+j+6)[1] = (uint)in[i+2];
-   }
-}
-static void translate_polygon_uint2uint(
-    const void * _in,
-    unsigned nr,
-    void *_out )
-{
-  const uint*in = (const uint*)_in;
-  uint *out = (uint*)_out;
-  unsigned i, j;
-  (void)j;
-  for (j = i = 0; j < nr; j+=6, i++) { 
-      (out+j)[0] = (uint)in[0];
-      (out+j)[1] = (uint)in[i+1];
-      (out+j+2)[0] = (uint)in[i+1];
-      (out+j+2)[1] = (uint)in[i+2];
-      (out+j+4)[0] = (uint)in[i+2];
-      (out+j+4)[1] = (uint)in[0];
-   }
-}
-void u_unfilled_init( void )
-{
-  static int firsttime = 1;
-  if (!firsttime) return;
-  firsttime = 0;
-generate_line[OUT_USHORT][PIPE_PRIM_TRIANGLES] = generate_tris_ushort;
-generate_line[OUT_USHORT][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort;
-generate_line[OUT_USHORT][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort;
-generate_line[OUT_USHORT][PIPE_PRIM_QUADS] = generate_quads_ushort;
-generate_line[OUT_USHORT][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort;
-generate_line[OUT_USHORT][PIPE_PRIM_POLYGON] = generate_polygon_ushort;
-generate_line[OUT_UINT][PIPE_PRIM_TRIANGLES] = generate_tris_uint;
-generate_line[OUT_UINT][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint;
-generate_line[OUT_UINT][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint;
-generate_line[OUT_UINT][PIPE_PRIM_QUADS] = generate_quads_uint;
-generate_line[OUT_UINT][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint;
-generate_line[OUT_UINT][PIPE_PRIM_POLYGON] = generate_polygon_uint;
-translate_line[IN_UBYTE][OUT_USHORT][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort;
-translate_line[IN_UBYTE][OUT_USHORT][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort;
-translate_line[IN_UBYTE][OUT_USHORT][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort;
-translate_line[IN_UBYTE][OUT_USHORT][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort;
-translate_line[IN_UBYTE][OUT_USHORT][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort;
-translate_line[IN_UBYTE][OUT_USHORT][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort;
-translate_line[IN_UBYTE][OUT_UINT][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint;
-translate_line[IN_UBYTE][OUT_UINT][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint;
-translate_line[IN_UBYTE][OUT_UINT][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint;
-translate_line[IN_UBYTE][OUT_UINT][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint;
-translate_line[IN_UBYTE][OUT_UINT][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint;
-translate_line[IN_UBYTE][OUT_UINT][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint;
-translate_line[IN_USHORT][OUT_USHORT][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort;
-translate_line[IN_USHORT][OUT_USHORT][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort;
-translate_line[IN_USHORT][OUT_USHORT][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort;
-translate_line[IN_USHORT][OUT_USHORT][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort;
-translate_line[IN_USHORT][OUT_USHORT][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort;
-translate_line[IN_USHORT][OUT_USHORT][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort;
-translate_line[IN_USHORT][OUT_UINT][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint;
-translate_line[IN_USHORT][OUT_UINT][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint;
-translate_line[IN_USHORT][OUT_UINT][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint;
-translate_line[IN_USHORT][OUT_UINT][PIPE_PRIM_QUADS] = translate_quads_ushort2uint;
-translate_line[IN_USHORT][OUT_UINT][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint;
-translate_line[IN_USHORT][OUT_UINT][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint;
-translate_line[IN_UINT][OUT_USHORT][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort;
-translate_line[IN_UINT][OUT_USHORT][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort;
-translate_line[IN_UINT][OUT_USHORT][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort;
-translate_line[IN_UINT][OUT_USHORT][PIPE_PRIM_QUADS] = translate_quads_uint2ushort;
-translate_line[IN_UINT][OUT_USHORT][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort;
-translate_line[IN_UINT][OUT_USHORT][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort;
-translate_line[IN_UINT][OUT_UINT][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint;
-translate_line[IN_UINT][OUT_UINT][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint;
-translate_line[IN_UINT][OUT_UINT][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint;
-translate_line[IN_UINT][OUT_UINT][PIPE_PRIM_QUADS] = translate_quads_uint2uint;
-translate_line[IN_UINT][OUT_UINT][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint;
-translate_line[IN_UINT][OUT_UINT][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint;
-}
-#include "indices/u_unfilled_indices.c"
-- 
cgit v1.2.3


From 2c29a93e875dd96fb08c65ec659efc3eb2d5c15e Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 8 May 2010 20:11:29 -0400
Subject: vl: Get softpipe building again.

---
 configure.ac                                    |  11 +-
 src/gallium/drivers/softpipe/Makefile           |   5 +-
 src/gallium/drivers/softpipe/SConscript         |   1 +
 src/gallium/drivers/softpipe/sp_video_context.c |   2 +-
 src/gallium/state_trackers/xorg/xvmc/surface.c  |   2 +
 src/gallium/winsys/Makefile                     |   2 +-
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c      | 260 +++---------------------
 7 files changed, 35 insertions(+), 248 deletions(-)

diff --git a/configure.ac b/configure.ac
index d54f7e555f6..7062817ea1c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1442,16 +1442,7 @@ AC_ARG_ENABLE([gallium-g3dvl],
     [enable_gallium_g3dvl="$enableval"],
     [enable_gallium_g3dvl=no])
 if test "x$enable_gallium_g3dvl" = xyes; then
-    vl_winsys_dirs=""
-    for dir in $GALLIUM_WINSYS_DIRS; do
-        vl_winsys_dirs="$vl_winsys_dirs g3dvl/$dir"
-    done
-    # Hack, g3dvl dri state tracker is in winsys/g3dvl/dri
-    # and needs to be built before the drm bits
-    if test "$mesa_driver" = dri; then
-        vl_winsys_dirs="g3dvl/dri $vl_winsys_dirs"
-    fi
-    GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS $vl_winsys_dirs"
+    GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS xvmc-softpipe"
 fi
 
 dnl
diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile
index 83f3e4a19b6..3111c49a2c2 100644
--- a/src/gallium/drivers/softpipe/Makefile
+++ b/src/gallium/drivers/softpipe/Makefile
@@ -19,7 +19,7 @@ C_SOURCES = \
 	sp_quad_fs.c \
 	sp_quad_blend.c \
 	sp_screen.c \
-        sp_setup.c \
+	sp_setup.c \
 	sp_state_blend.c \
 	sp_state_clip.c \
 	sp_state_derived.c \
@@ -32,6 +32,7 @@ C_SOURCES = \
 	sp_tex_sample.c \
 	sp_tex_tile_cache.c \
 	sp_tile_cache.c \
-	sp_surface.c
+	sp_surface.c \
+	sp_video_context.c
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript
index b80c6dea93a..c51f449c638 100644
--- a/src/gallium/drivers/softpipe/SConscript
+++ b/src/gallium/drivers/softpipe/SConscript
@@ -34,6 +34,7 @@ softpipe = env.ConvenienceLibrary(
 		'sp_tex_tile_cache.c',
 		'sp_texture.c',
 		'sp_tile_cache.c',
+		'sp_video_context.c',
 	])
 
 Export('softpipe')
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 9d75a1e508f..7024e653e3b 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -33,7 +33,7 @@
 #include <util/u_memory.h>
 #include <util/u_rect.h>
 #include <util/u_video.h>
-#include "sp_winsys.h"
+#include "sp_public.h"
 #include "sp_texture.h"
 
 static void
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index decb2e896cd..9340744fa36 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -437,12 +437,14 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
 
    vl_video_bind_drawable(context_priv->vctx, drawable);
 
+#if 0
    vpipe->screen->flush_frontbuffer
    (
       vpipe->screen,
       context_priv->backbuffer,
       vpipe->priv
    );
+#endif
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Pushed surface %p to front buffer.\n", surface);
 
diff --git a/src/gallium/winsys/Makefile b/src/gallium/winsys/Makefile
index e7cb154b84b..bce5b3f9e0d 100644
--- a/src/gallium/winsys/Makefile
+++ b/src/gallium/winsys/Makefile
@@ -2,7 +2,7 @@
 TOP = ../../..
 include $(TOP)/configs/current
 
-SUBDIRS = $(GALLIUM_WINSYS_DIRS) g3dvl
+SUBDIRS = $(GALLIUM_WINSYS_DIRS)
 
 default install clean:
 	@for dir in $(SUBDIRS) ; do \
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 0d183b8d661..1df78e986d6 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -26,16 +26,17 @@
  **************************************************************************/
 
 #include <vl_winsys.h>
-#include <X11/Xutil.h>
-#include <util/u_simple_screen.h>
-#include <pipe/p_state.h>
-#include <util/u_inlines.h>
-#include <util/u_format.h>
+#include <state_tracker/xlib_sw_winsys.h>
+//#include <X11/Xutil.h>
+//#include <util/u_simple_screen.h>
+//#include <pipe/p_state.h>
+//#include <util/u_inlines.h>
+//#include <util/u_format.h>
 #include <util/u_memory.h>
-#include <util/u_math.h>
-#include <softpipe/sp_winsys.h>
+//#include <util/u_math.h>
+#include <softpipe/sp_public.h>
 #include <softpipe/sp_video_context.h>
-#include <softpipe/sp_texture.h>
+//#include <softpipe/sp_texture.h>
 
 /* TODO: Find a good way to calculate this */
 static enum pipe_format VisualToPipe(Visual *visual)
@@ -44,186 +45,10 @@ static enum pipe_format VisualToPipe(Visual *visual)
    return PIPE_FORMAT_B8G8R8X8_UNORM;
 }
 
-struct xsp_pipe_winsys
-{
-   struct pipe_winsys base;
-   Display *display;
-   int screen;
-   XImage *fbimage;
-};
-
-struct xsp_context
-{
-   struct vl_context base;
-
-   Drawable drawable;
-};
-
-struct xsp_buffer
-{
-   struct pipe_buffer base;
-   boolean is_user_buffer;
-   void *data;
-   void *mapped_data;
-};
-
-static struct pipe_buffer* xsp_buffer_create(struct pipe_winsys *pws, unsigned alignment, unsigned usage, unsigned size)
-{
-   struct xsp_buffer *buffer;
-
-   assert(pws);
-
-   buffer = calloc(1, sizeof(struct xsp_buffer));
-   pipe_reference_init(&buffer->base.reference, 1);
-   buffer->base.alignment = alignment;
-   buffer->base.usage = usage;
-   buffer->base.size = size;
-   buffer->data = align_malloc(size, alignment);
-
-   return (struct pipe_buffer*)buffer;
-}
-
-static struct pipe_buffer* xsp_user_buffer_create(struct pipe_winsys *pws, void *data, unsigned size)
-{
-   struct xsp_buffer *buffer;
-
-   assert(pws);
-
-   buffer = calloc(1, sizeof(struct xsp_buffer));
-   pipe_reference_init(&buffer->base.reference, 1);
-   buffer->base.size = size;
-   buffer->is_user_buffer = TRUE;
-   buffer->data = data;
-
-   return (struct pipe_buffer*)buffer;
-}
-
-static void* xsp_buffer_map(struct pipe_winsys *pws, struct pipe_buffer *buffer, unsigned flags)
-{
-   struct xsp_buffer *xsp_buf = (struct xsp_buffer*)buffer;
-
-   assert(pws);
-   assert(buffer);
-
-   xsp_buf->mapped_data = xsp_buf->data;
-
-   return xsp_buf->mapped_data;
-}
-
-static void xsp_buffer_unmap(struct pipe_winsys *pws, struct pipe_buffer *buffer)
-{
-   struct xsp_buffer *xsp_buf = (struct xsp_buffer*)buffer;
-
-   assert(pws);
-   assert(buffer);
-
-   xsp_buf->mapped_data = NULL;
-}
-
-static void xsp_buffer_destroy(struct pipe_buffer *buffer)
-{
-   struct xsp_buffer *xsp_buf = (struct xsp_buffer*)buffer;
-
-   assert(buffer);
-
-   if (!xsp_buf->is_user_buffer)
-      align_free(xsp_buf->data);
-
-   free(xsp_buf);
-}
-
-static struct pipe_buffer* xsp_surface_buffer_create
-(
-   struct pipe_winsys *pws,
-   unsigned width,
-   unsigned height,
-   enum pipe_format format,
-   unsigned usage,
-   unsigned tex_usage,
-   unsigned *stride
-)
-{
-   const unsigned int ALIGNMENT = 1;
-   unsigned nblocksy;
-
-   nblocksy = util_format_get_nblocksy(format, height);
-   *stride = align(util_format_get_stride(format, width), ALIGNMENT);
-
-   return pws->buffer_create(pws, ALIGNMENT, usage,
-                             *stride * nblocksy);
-}
-
-static void xsp_fence_reference(struct pipe_winsys *pws, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence)
-{
-   assert(pws);
-   assert(ptr);
-   assert(fence);
-}
-
-static int xsp_fence_signalled(struct pipe_winsys *pws, struct pipe_fence_handle *fence, unsigned flag)
-{
-   assert(pws);
-   assert(fence);
-
-   return 0;
-}
-
-static int xsp_fence_finish(struct pipe_winsys *pws, struct pipe_fence_handle *fence, unsigned flag)
-{
-   assert(pws);
-   assert(fence);
-
-   return 0;
-}
-
-static void xsp_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surface, void *context_private)
-{
-   struct xsp_pipe_winsys *xsp_winsys;
-   struct xsp_context *xsp_context;
-
-   assert(pws);
-   assert(surface);
-   assert(context_private);
-
-   xsp_winsys = (struct xsp_pipe_winsys*)pws;
-   xsp_context = (struct xsp_context*)context_private;
-   xsp_winsys->fbimage->width = surface->width;
-   xsp_winsys->fbimage->height = surface->height;
-   xsp_winsys->fbimage->bytes_per_line = surface->width * (xsp_winsys->fbimage->bits_per_pixel >> 3);
-   xsp_winsys->fbimage->data = (char*)((struct xsp_buffer *)softpipe_texture(surface->texture)->buffer)->data + surface->offset;
-
-   XPutImage
-   (
-      xsp_winsys->display, xsp_context->drawable,
-      XDefaultGC(xsp_winsys->display, xsp_winsys->screen),
-      xsp_winsys->fbimage, 0, 0, 0, 0,
-      surface->width, surface->height
-   );
-   XFlush(xsp_winsys->display);
-}
-
-static const char* xsp_get_name(struct pipe_winsys *pws)
-{
-   assert(pws);
-   return "X11 SoftPipe";
-}
-
-static void xsp_destroy(struct pipe_winsys *pws)
-{
-   struct xsp_pipe_winsys *xsp_winsys = (struct xsp_pipe_winsys*)pws;
-
-   assert(pws);
-
-   /* XDestroyImage() wants to free the data as well */
-   xsp_winsys->fbimage->data = NULL;
-
-   XDestroyImage(xsp_winsys->fbimage);
-   FREE(xsp_winsys);
-}
-
 Drawable
 vl_video_bind_drawable(struct vl_context *vctx, Drawable drawable)
 {
+#if 0
    struct xsp_context *xsp_context = (struct xsp_context*)vctx;
    Drawable old_drawable;
 
@@ -233,13 +58,15 @@ vl_video_bind_drawable(struct vl_context *vctx, Drawable drawable)
    xsp_context->drawable = drawable;
 
    return old_drawable;
+#endif
+   return None;
 }
 
 struct vl_screen*
 vl_screen_create(Display *display, int screen)
 {
    struct vl_screen *vscreen;
-   struct xsp_pipe_winsys *xsp_winsys;
+   struct sw_winsys *winsys;
 
    assert(display);
 
@@ -247,54 +74,17 @@ vl_screen_create(Display *display, int screen)
    if (!vscreen)
       return NULL;
 
-   xsp_winsys = CALLOC_STRUCT(xsp_pipe_winsys);
-   if (!xsp_winsys) {
+   winsys = xlib_create_sw_winsys(display);
+   if (!winsys) {
       FREE(vscreen);
       return NULL;
    }
 
-   xsp_winsys->base.buffer_create = xsp_buffer_create;
-   xsp_winsys->base.user_buffer_create = xsp_user_buffer_create;
-   xsp_winsys->base.buffer_map = xsp_buffer_map;
-   xsp_winsys->base.buffer_unmap = xsp_buffer_unmap;
-   xsp_winsys->base.buffer_destroy = xsp_buffer_destroy;
-   xsp_winsys->base.surface_buffer_create = xsp_surface_buffer_create;
-   xsp_winsys->base.fence_reference = xsp_fence_reference;
-   xsp_winsys->base.fence_signalled = xsp_fence_signalled;
-   xsp_winsys->base.fence_finish = xsp_fence_finish;
-   xsp_winsys->base.flush_frontbuffer = xsp_flush_frontbuffer;
-   xsp_winsys->base.get_name = xsp_get_name;
-   xsp_winsys->base.destroy = xsp_destroy;
-   xsp_winsys->display = display;
-   xsp_winsys->screen = screen;
-   xsp_winsys->fbimage = XCreateImage
-   (
-      display,
-      XDefaultVisual(display, screen),
-      XDefaultDepth(display, screen),
-      ZPixmap,
-      0,
-      NULL,
-      0, /* Don't know the width and height until flush_frontbuffer */
-      0,
-      32,
-      0
-   );
-
-   if (!xsp_winsys->fbimage) {
-      FREE(xsp_winsys);
-      FREE(vscreen);
-      return NULL;
-   }
-
-   XInitImage(xsp_winsys->fbimage);
-
-   vscreen->pscreen = softpipe_create_screen(&xsp_winsys->base);
+   vscreen->pscreen = softpipe_create_screen(winsys);
 
    if (!vscreen->pscreen) {
+      winsys->destroy(winsys);
       FREE(vscreen);
-      XDestroyImage(xsp_winsys->fbimage);
-      FREE(xsp_winsys);
       return NULL;
    }
 
@@ -318,7 +108,7 @@ vl_video_create(struct vl_screen *vscreen,
                 unsigned width, unsigned height)
 {
    struct pipe_video_context *vpipe;
-   struct xsp_context *xsp_context;
+   struct vl_context *vctx;
 
    assert(vscreen);
    assert(width && height);
@@ -327,23 +117,25 @@ vl_video_create(struct vl_screen *vscreen,
    if (!vpipe)
       return NULL;
 
-   xsp_context = CALLOC_STRUCT(xsp_context);
-   if (!xsp_context) {
+   vctx = CALLOC_STRUCT(vl_context);
+   if (!vctx) {
       vpipe->destroy(vpipe);
       return NULL;
    }
 
-   vpipe->priv = xsp_context;
-   xsp_context->base.vpipe = vpipe;
-   xsp_context->base.vscreen = vscreen;
+   vpipe->priv = vctx;
+   vctx->vpipe = vpipe;
+   vctx->vscreen = vscreen;
 
-   return &xsp_context->base;
+   return vctx;
 }
 
 void vl_video_destroy(struct vl_context *vctx)
 {
    assert(vctx);
 
+#if 0
    vctx->vpipe->destroy(vctx->vpipe);
+#endif
    FREE(vctx);
 }
-- 
cgit v1.2.3


From 0e59cd33e6a38567801c7da541e4caffbd6cccd3 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sun, 23 May 2010 19:56:12 -0400
Subject: vl: Get softpipe working again.

Still segfaults on softpipe->destroy() in the draw module when
freeing a vertex buffer.
---
 src/gallium/auxiliary/vl/vl_compositor.c         | 51 +++++++++++++++++-
 src/gallium/auxiliary/vl/vl_compositor.h         |  3 ++
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 69 ++++++++++++++++++++++--
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  3 ++
 src/gallium/state_trackers/xorg/xvmc/surface.c   | 27 ++++------
 src/gallium/targets/Makefile.xvmc                | 61 +++++++++++++++++++++
 src/gallium/targets/xvmc-softpipe/Makefile       | 19 +++++++
 src/gallium/winsys/g3dvl/vl_winsys.h             |  6 ++-
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c       | 46 ++++++++--------
 9 files changed, 240 insertions(+), 45 deletions(-)
 create mode 100644 src/gallium/targets/Makefile.xvmc
 create mode 100644 src/gallium/targets/xvmc-softpipe/Makefile

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 8f21eb68d64..b128af19ba3 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -30,6 +30,8 @@
 #include <pipe/p_context.h>
 #include <util/u_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_keymap.h>
+#include <util/u_sampler.h>
 #include <tgsi/tgsi_ureg.h>
 #include "vl_csc.h"
 
@@ -251,6 +253,21 @@ cleanup_buffers(struct vl_compositor *c)
    pipe_resource_reference(&c->fs_const_buf, NULL);
 }
 
+static void
+texview_map_delete(const struct keymap *map,
+                   const void *key, void *data,
+                   void *user)
+{
+   struct pipe_context *pipe = (struct pipe_context*)user;
+
+   assert(map);
+   assert(key);
+   assert(data);
+   assert(user);
+
+   pipe->sampler_view_destroy(pipe, data);
+}
+
 bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe)
 {
    unsigned i;
@@ -261,13 +278,22 @@ bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *p
 
    compositor->pipe = pipe;
 
-   if (!init_pipe_state(compositor))
+   compositor->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
+                                             texview_map_delete);
+   if (!compositor->texview_map)
       return false;
+
+   if (!init_pipe_state(compositor)) {
+      util_delete_keymap(compositor->texview_map, compositor->pipe);
+      return false;
+   }
    if (!init_shaders(compositor)) {
+      util_delete_keymap(compositor->texview_map, compositor->pipe);
       cleanup_pipe_state(compositor);
       return false;
    }
    if (!init_buffers(compositor)) {
+      util_delete_keymap(compositor->texview_map, compositor->pipe);
       cleanup_shaders(compositor);
       cleanup_pipe_state(compositor);
       return false;
@@ -288,6 +314,7 @@ void vl_compositor_cleanup(struct vl_compositor *compositor)
 {
    assert(compositor);
 
+   util_delete_keymap(compositor->texview_map, compositor->pipe);
    cleanup_buffers(compositor);
    cleanup_shaders(compositor);
    cleanup_pipe_state(compositor);
@@ -459,8 +486,28 @@ static void draw_layers(struct vl_compositor *c,
    num_rects = gen_data(c, src_surface, src_rect, dst_rect, src_surfaces);
 
    for (i = 0; i < num_rects; ++i) {
-      //c->pipe->set_fragment_sampler_views(c->pipe, 1, &src_surfaces[i]->texture);
+      boolean delete_view = FALSE;
+      struct pipe_sampler_view *surface_view = (struct pipe_sampler_view*)util_keymap_lookup(c->texview_map,
+                                                                                             &src_surfaces[i]);
+      if (!surface_view) {
+         struct pipe_sampler_view templat;
+         u_sampler_view_default_template(&templat, src_surfaces[i]->texture,
+                                         src_surfaces[i]->texture->format);
+         surface_view = c->pipe->create_sampler_view(c->pipe, src_surfaces[i]->texture,
+                                                     &templat);
+         if (!surface_view)
+            return;
+
+         delete_view = !util_keymap_insert(c->texview_map, &src_surfaces[i],
+                                           surface_view, c->pipe);
+      }
+
+      c->pipe->set_fragment_sampler_views(c->pipe, 1, &surface_view);
       c->pipe->draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
+
+      if (delete_view) {
+         c->pipe->sampler_view_destroy(c->pipe, surface_view);
+      }
    }
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 4b0cdd6be5f..026ae559ed7 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -34,6 +34,7 @@
 #include "vl_types.h"
 
 struct pipe_context;
+struct keymap;
 
 #define VL_COMPOSITOR_MAX_LAYERS 16
 
@@ -59,6 +60,8 @@ struct vl_compositor
    struct pipe_video_rect layer_src_rects[VL_COMPOSITOR_MAX_LAYERS];
    struct pipe_video_rect layer_dst_rects[VL_COMPOSITOR_MAX_LAYERS];
    unsigned dirty_layers;
+
+   struct keymap *texview_map;
 };
 
 bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index a4a9320d667..1e9a02c270d 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -32,6 +32,7 @@
 #include <util/u_format.h>
 #include <util/u_math.h>
 #include <util/u_memory.h>
+#include <util/u_keymap.h>
 #include <util/u_sampler.h>
 #include <tgsi/tgsi_ureg.h>
 
@@ -1004,6 +1005,33 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
       pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
 }
 
+static struct pipe_sampler_view
+*find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
+{
+   struct pipe_sampler_view *sampler_view;
+   assert(r);
+   assert(surface);
+
+   sampler_view = (struct pipe_sampler_view*)util_keymap_lookup(r->texview_map, &surface);
+   if (!sampler_view) {
+      struct pipe_sampler_view templat;
+      boolean added_to_map;
+
+      u_sampler_view_default_template(&templat, surface->texture,
+                                      surface->texture->format);
+      sampler_view = r->pipe->create_sampler_view(r->pipe, surface->texture,
+                                                  &templat);
+      if (!sampler_view)
+         return NULL;
+
+      added_to_map = util_keymap_insert(r->texview_map, &surface,
+                                        sampler_view, r->pipe);
+      assert(added_to_map);
+   }
+
+   return sampler_view;
+}
+
 static void
 flush(struct vl_mpeg12_mc_renderer *r)
 {
@@ -1051,10 +1079,11 @@ flush(struct vl_mpeg12_mc_renderer *r)
       vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
    }
 
-   if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0*/) {
+   if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->past->texture;
+      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
       r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
@@ -1069,6 +1098,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->past->texture;
+      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
       r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
@@ -1079,10 +1109,11 @@ flush(struct vl_mpeg12_mc_renderer *r)
       vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
    }
 
-   if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0*/) {
+   if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->future->texture;
+      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
       r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
@@ -1097,6 +1128,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->future->texture;
+      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
       r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
@@ -1107,11 +1139,13 @@ flush(struct vl_mpeg12_mc_renderer *r)
       vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
    }
 
-   if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0*/) {
+   if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
       r->textures.individual.ref[0] = r->past->texture;
       r->textures.individual.ref[1] = r->future->texture;
+      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
+      r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
       r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
@@ -1127,6 +1161,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
       r->textures.individual.ref[0] = r->past->texture;
       r->textures.individual.ref[1] = r->future->texture;
+      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
+      r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
       r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
       r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
@@ -1270,6 +1306,21 @@ grab_macroblock(struct vl_mpeg12_mc_renderer *r,
    ++r->num_macroblocks;
 }
 
+static void
+texview_map_delete(const struct keymap *map,
+                   const void *key, void *data,
+                   void *user)
+{
+   struct pipe_context *pipe = (struct pipe_context*)user;
+
+   assert(map);
+   assert(key);
+   assert(data);
+   assert(user);
+
+   pipe->sampler_view_destroy(pipe, data);
+}
+
 bool
 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            struct pipe_context *pipe,
@@ -1302,13 +1353,22 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    renderer->eb_handling = eb_handling;
    renderer->pot_buffers = pot_buffers;
 
-   if (!init_pipe_state(renderer))
+   renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
+                                           texview_map_delete);
+   if (!renderer->texview_map)
+      return false;
+
+   if (!init_pipe_state(renderer)) {
+      util_delete_keymap(renderer->texview_map, renderer->pipe);
       return false;
+   }
    if (!init_shaders(renderer)) {
+      util_delete_keymap(renderer->texview_map, renderer->pipe);
       cleanup_pipe_state(renderer);
       return false;
    }
    if (!init_buffers(renderer)) {
+      util_delete_keymap(renderer->texview_map, renderer->pipe);
       cleanup_shaders(renderer);
       cleanup_pipe_state(renderer);
       return false;
@@ -1333,6 +1393,7 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 
    xfer_buffers_unmap(renderer);
 
+   util_delete_keymap(renderer->texview_map, renderer->pipe);
    cleanup_pipe_state(renderer);
    cleanup_shaders(renderer);
    cleanup_buffers(renderer);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 2e37efbc3cd..85191cf6b02 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -35,6 +35,7 @@
 
 struct pipe_context;
 struct pipe_macroblock;
+struct keymap;
 
 /* A slice is video-width (rounded up to a multiple of macroblock width) x macroblock height */
 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE
@@ -105,6 +106,8 @@ struct vl_mpeg12_mc_renderer
    short *texels[3];
    struct vertex2f surface_tex_inv_size;
    struct vertex2f zero_block[3];
+
+   struct keymap *texview_map;
 };
 
 bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 9340744fa36..985cc1a97a6 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -121,7 +121,7 @@ CreateOrResizeBackBuffer(struct vl_context *vctx, unsigned int width, unsigned i
    template.height0 = height;
    template.depth0 = 1;
    template.usage = PIPE_USAGE_DEFAULT;
-   template.bind = PIPE_BIND_RENDER_TARGET;
+   template.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_BLIT_SOURCE;
    template.flags = 0;
 
    tex = vpipe->screen->resource_create(vpipe->screen, &template);
@@ -129,7 +129,7 @@ CreateOrResizeBackBuffer(struct vl_context *vctx, unsigned int width, unsigned i
       return false;
 
    *backbuffer = vpipe->screen->get_tex_surface(vpipe->screen, tex, 0, 0, 0,
-                                                PIPE_BIND_RENDER_TARGET | PIPE_BIND_BLIT_SOURCE);
+                                                template.bind);
    pipe_resource_reference(&tex, NULL);
 
    if (!*backbuffer)
@@ -366,11 +366,6 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
                       short destx, short desty, unsigned short destw, unsigned short desth,
                       int flags)
 {
-   Window root;
-   int x, y;
-   unsigned int width, height;
-   unsigned int border_width;
-   unsigned int depth;
    struct pipe_video_context *vpipe;
    XvMCSurfacePrivate *surface_priv;
    XvMCContextPrivate *context_priv;
@@ -378,6 +373,8 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    XvMCContext *context;
    struct pipe_video_rect src_rect = {srcx, srcy, srcw, srch};
    struct pipe_video_rect dst_rect = {destx, desty, destw, desth};
+   void *displaytarget;
+   unsigned width, height;
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Displaying surface %p.\n", surface);
 
@@ -386,7 +383,12 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    if (!surface || !surface->privData)
       return XvMCBadSurface;
 
-   if (XGetGeometry(dpy, drawable, &root, &x, &y, &width, &height, &border_width, &depth) == BadDrawable)
+   surface_priv = surface->privData;
+   context = surface_priv->context;
+   context_priv = context->privData;
+
+   displaytarget = vl_displaytarget_get(context_priv->vctx->vscreen, drawable, &width, &height);
+   if (!displaytarget)
       return BadDrawable;
 
    assert(flags == XVMC_TOP_FIELD || flags == XVMC_BOTTOM_FIELD || flags == XVMC_FRAME_PICTURE);
@@ -404,9 +406,6 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    assert(desty + desth - 1 < height);
     */
 
-   surface_priv = surface->privData;
-   context = surface_priv->context;
-   context_priv = context->privData;
    subpicture_priv = surface_priv->subpicture ? surface_priv->subpicture->privData : NULL;
    vpipe = context_priv->vctx->vpipe;
 
@@ -435,16 +434,12 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);
 
-   vl_video_bind_drawable(context_priv->vctx, drawable);
-
-#if 0
    vpipe->screen->flush_frontbuffer
    (
       vpipe->screen,
       context_priv->backbuffer,
-      vpipe->priv
+      displaytarget
    );
-#endif
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Pushed surface %p to front buffer.\n", surface);
 
diff --git a/src/gallium/targets/Makefile.xvmc b/src/gallium/targets/Makefile.xvmc
new file mode 100644
index 00000000000..53044342317
--- /dev/null
+++ b/src/gallium/targets/Makefile.xvmc
@@ -0,0 +1,61 @@
+# This makefile template is used to build libXvMCg3dvl.so
+
+LIBBASENAME = XvMCg3dvl
+LIBNAME = lib$(LIBBASENAME).so
+XVMC_MAJOR = 1
+XVMC_MINOR = 0
+INCLUDES = -I$(TOP)/src/gallium/include \
+	   -I$(TOP)/src/gallium/drivers \
+	   -I$(TOP)/src/gallium/auxiliary \
+	   -I$(TOP)/src/gallium/winsys/g3dvl \
+	   $(DRIVER_INCLUDES)
+DEFINES = -DGALLIUM_TRACE $(DRIVER_DEFINES)
+LIBS = $(EXTRA_LIB_PATH) $(DRIVER_LIBS) -lXvMC -lXv -lX11 -lm
+STATE_TRACKER_LIB = $(TOP)/src/gallium/state_trackers/xorg/xvmc/libxvmctracker.a
+
+# XXX: Hack, XvMC public funcs aren't exported if we link to libxvmctracker.a :(
+OBJECTS = $(C_SOURCES:.c=.o) \
+	  $(ASM_SOURCES:.S=.o) \
+	  $(TOP)/src/gallium/state_trackers/xorg/xvmc/*.o
+
+##### RULES #####
+
+.c.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+.S.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+##### TARGETS #####
+
+default: depend symlinks $(TOP)/$(LIB_DIR)/gallium/$(LIBNAME)
+
+$(TOP)/$(LIB_DIR)/gallium/$(LIBNAME): $(OBJECTS) $(PIPE_DRIVERS) $(STATE_TRACKER-LIB) $(TOP)/$(LIB_DIR)/gallium Makefile
+	$(MKLIB) -o $(LIBBASENAME) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+		-major $(XVMC_MAJOR) -minor $(XVMC_MINOR) $(MKLIB_OPTIONS) \
+		-install $(TOP)/$(LIB_DIR)/gallium \
+		$(OBJECTS) $(STATE_TRACKER_LIB) $(PIPE_DRIVERS) $(LIBS)
+
+$(TOP)/$(LIB_DIR)/gallium:
+	mkdir -p $@
+
+depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
+	rm -f depend
+	touch depend
+	$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDES) $(C_SOURCES) \
+		$(ASM_SOURCES) 2> /dev/null
+
+# Emacs tags
+tags:
+	etags `find . -name \*.[ch]` `find ../include`
+
+# Remove .o and backup files
+clean:
+	-rm -f *.o *~ *.so $(SYMLINKS)
+	-rm -f depend depend.bak
+
+#install: $(LIBNAME)
+#	$(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+#	$(MINSTALL) -m 755 $(LIBNAME) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+
+include depend
diff --git a/src/gallium/targets/xvmc-softpipe/Makefile b/src/gallium/targets/xvmc-softpipe/Makefile
new file mode 100644
index 00000000000..1e3ff8ac89c
--- /dev/null
+++ b/src/gallium/targets/xvmc-softpipe/Makefile
@@ -0,0 +1,19 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+DRIVER_DEFINES = -DGALLIUM_SOFTPIPE
+DRIVER_INCLUDES =
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/winsys/sw/xlib/libws_xlib.a \
+	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+	$(TOP)/src/gallium/auxiliary/libgallium.a
+
+C_SOURCES = \
+	$(TOP)/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+
+DRIVER_LIBS =
+
+include ../Makefile.xvmc
+
+symlinks:
diff --git a/src/gallium/winsys/g3dvl/vl_winsys.h b/src/gallium/winsys/g3dvl/vl_winsys.h
index d95e9c58335..c75ff9f32f5 100644
--- a/src/gallium/winsys/g3dvl/vl_winsys.h
+++ b/src/gallium/winsys/g3dvl/vl_winsys.h
@@ -37,6 +37,7 @@ struct pipe_video_context;
 
 struct vl_screen
 {
+   Display *display;
    enum pipe_format format;
    struct pipe_screen *pscreen;
 };
@@ -60,7 +61,8 @@ vl_video_create(struct vl_screen *vscreen,
 
 void vl_video_destroy(struct vl_context *vctx);
 
-Drawable
-vl_video_bind_drawable(struct vl_context *vctx, Drawable drawable);
+void*
+vl_displaytarget_get(struct vl_screen *vscreen, Drawable drawable,
+                     unsigned *width, unsigned *height);
 
 #endif
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 1df78e986d6..e5d4664d4e8 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -27,16 +27,9 @@
 
 #include <vl_winsys.h>
 #include <state_tracker/xlib_sw_winsys.h>
-//#include <X11/Xutil.h>
-//#include <util/u_simple_screen.h>
-//#include <pipe/p_state.h>
-//#include <util/u_inlines.h>
-//#include <util/u_format.h>
 #include <util/u_memory.h>
-//#include <util/u_math.h>
 #include <softpipe/sp_public.h>
 #include <softpipe/sp_video_context.h>
-//#include <softpipe/sp_texture.h>
 
 /* TODO: Find a good way to calculate this */
 static enum pipe_format VisualToPipe(Visual *visual)
@@ -45,21 +38,31 @@ static enum pipe_format VisualToPipe(Visual *visual)
    return PIPE_FORMAT_B8G8R8X8_UNORM;
 }
 
-Drawable
-vl_video_bind_drawable(struct vl_context *vctx, Drawable drawable)
+/* XXX: Not thread-safe */
+static struct xlib_drawable xdraw;
+
+void*
+vl_displaytarget_get(struct vl_screen *vscreen, Drawable drawable,
+                     unsigned *width_out, unsigned *height_out)
 {
-#if 0
-   struct xsp_context *xsp_context = (struct xsp_context*)vctx;
-   Drawable old_drawable;
+   Window root;
+   int x, y;
+   unsigned int width, height;
+   unsigned int border_width;
+   unsigned int depth;
 
-   assert(vctx);
+   assert(vscreen);
+
+   if (XGetGeometry(vscreen->display, drawable, &root, &x, &y, &width, &height, &border_width, &depth) == BadDrawable)
+      return NULL;
 
-   old_drawable = xsp_context->drawable;
-   xsp_context->drawable = drawable;
+   if (width_out) *width_out = width;
+   if (height_out) *height_out = height;
 
-   return old_drawable;
-#endif
-   return None;
+   xdraw.depth = depth;
+   xdraw.drawable = drawable;
+
+   return &xdraw;
 }
 
 struct vl_screen*
@@ -81,14 +84,15 @@ vl_screen_create(Display *display, int screen)
    }
 
    vscreen->pscreen = softpipe_create_screen(winsys);
-
    if (!vscreen->pscreen) {
       winsys->destroy(winsys);
       FREE(vscreen);
       return NULL;
    }
 
-   vscreen->format = VisualToPipe(XDefaultVisual(display, screen));
+   vscreen->display = display;
+   xdraw.visual = XDefaultVisual(display, screen);
+   vscreen->format = VisualToPipe(xdraw.visual);
 
    return vscreen;
 }
@@ -134,7 +138,7 @@ void vl_video_destroy(struct vl_context *vctx)
 {
    assert(vctx);
 
-#if 0
+#if 1
    vctx->vpipe->destroy(vctx->vpipe);
 #endif
    FREE(vctx);
-- 
cgit v1.2.3


From 0a51e8633287b97b4d88e0cb553854535f2fa5e7 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Mon, 24 May 2010 00:14:45 -0400
Subject: vl: Use pipe_screen::video_context_create hook instead of SP ctor.

---
 src/gallium/drivers/softpipe/sp_screen.c   | 2 ++
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c | 6 +++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 11aa0c4583b..00700974c78 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -37,6 +37,7 @@
 #include "sp_texture.h"
 #include "sp_screen.h"
 #include "sp_context.h"
+#include "sp_video_context.h"
 #include "sp_fence.h"
 #include "sp_public.h"
 
@@ -274,6 +275,7 @@ softpipe_create_screen(struct sw_winsys *winsys)
    screen->base.is_format_supported = softpipe_is_format_supported;
    screen->base.context_create = softpipe_create_context;
    screen->base.flush_frontbuffer = softpipe_flush_frontbuffer;
+   screen->base.video_context_create = sp_video_create;
 
    util_format_s3tc_init();
 
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index e5d4664d4e8..95c2af1e73c 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -116,8 +116,12 @@ vl_video_create(struct vl_screen *vscreen,
 
    assert(vscreen);
    assert(width && height);
+   assert(vscreen->pscreen->video_context_create);
 
-   vpipe = sp_video_create(vscreen->pscreen, profile, chroma_format, width, height);
+   vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen,
+                                                  profile,
+                                                  chroma_format,
+                                                  width, height, NULL);
    if (!vpipe)
       return NULL;
 
-- 
cgit v1.2.3


From 2798958d896dfc9dc6244bc3dce3db8a2f50b589 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Mon, 24 May 2010 13:44:06 -0400
Subject: vl: Dec sampler view refs instead of destroying them.

Gets rid of a bunch of double frees and the crash at shutdown.
---
 src/gallium/auxiliary/vl/vl_compositor.c         | 6 +++---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index b128af19ba3..8203bf79ee0 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -258,14 +258,14 @@ texview_map_delete(const struct keymap *map,
                    const void *key, void *data,
                    void *user)
 {
-   struct pipe_context *pipe = (struct pipe_context*)user;
+   struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
 
    assert(map);
    assert(key);
    assert(data);
    assert(user);
 
-   pipe->sampler_view_destroy(pipe, data);
+   pipe_sampler_view_reference(&sv, NULL);
 }
 
 bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe)
@@ -506,7 +506,7 @@ static void draw_layers(struct vl_compositor *c,
       c->pipe->draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
 
       if (delete_view) {
-         c->pipe->sampler_view_destroy(c->pipe, surface_view);
+         pipe_sampler_view_reference(&surface_view, NULL);
       }
    }
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 1e9a02c270d..bafe4861fa5 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -675,7 +675,7 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
    pipe_resource_reference(&r->vs_const_buf, NULL);
 
    for (i = 0; i < 3; ++i) {
-      r->pipe->sampler_view_destroy(r->pipe, r->sampler_views.all[i]);
+      pipe_sampler_view_reference(&r->sampler_views.all[i], NULL);
       r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state.all[i]);
       pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
       pipe_resource_reference(&r->textures.all[i], NULL);
@@ -1311,14 +1311,14 @@ texview_map_delete(const struct keymap *map,
                    const void *key, void *data,
                    void *user)
 {
-   struct pipe_context *pipe = (struct pipe_context*)user;
+   struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
 
    assert(map);
    assert(key);
    assert(data);
    assert(user);
 
-   pipe->sampler_view_destroy(pipe, data);
+   pipe_sampler_view_reference(&sv, NULL);
 }
 
 bool
-- 
cgit v1.2.3


From 62074f44bb5944593b65d6203deae6300bead42d Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 29 May 2010 19:20:41 -0400
Subject: st/xvmc: Restore tests removed by merge.

---
 .../state_trackers/xorg/xvmc/tests/test_blocks.c   | 111 ++++++++
 .../state_trackers/xorg/xvmc/tests/test_context.c  | 119 ++++++++
 .../xorg/xvmc/tests/test_rendering.c               | 317 +++++++++++++++++++++
 .../state_trackers/xorg/xvmc/tests/test_surface.c  |  98 +++++++
 .../state_trackers/xorg/xvmc/tests/testlib.c       | 146 ++++++++++
 .../state_trackers/xorg/xvmc/tests/testlib.h       |  69 +++++
 .../state_trackers/xorg/xvmc/tests/xvmc_bench.c    | 300 +++++++++++++++++++
 7 files changed, 1160 insertions(+)
 create mode 100644 src/gallium/state_trackers/xorg/xvmc/tests/test_blocks.c
 create mode 100644 src/gallium/state_trackers/xorg/xvmc/tests/test_context.c
 create mode 100644 src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c
 create mode 100644 src/gallium/state_trackers/xorg/xvmc/tests/test_surface.c
 create mode 100644 src/gallium/state_trackers/xorg/xvmc/tests/testlib.c
 create mode 100644 src/gallium/state_trackers/xorg/xvmc/tests/testlib.h
 create mode 100644 src/gallium/state_trackers/xorg/xvmc/tests/xvmc_bench.c

diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_blocks.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_blocks.c
new file mode 100644
index 00000000000..994e3ca4d14
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_blocks.c
@@ -0,0 +1,111 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <assert.h>
+#include <error.h>
+#include "testlib.h"
+
+int main(int argc, char **argv)
+{
+	const unsigned int	width = 16, height = 16;
+	const unsigned int	min_required_blocks = 1, min_required_macroblocks = 1;
+	const unsigned int	mc_types[2] = {XVMC_MOCOMP | XVMC_MPEG_2, XVMC_IDCT | XVMC_MPEG_2};
+
+	Display			*display;
+	XvPortID		port_num;
+	int			surface_type_id;
+	unsigned int		is_overlay, intra_unsigned;
+	int			colorkey;
+	XvMCContext		context;
+	XvMCSurface		surface;
+	XvMCBlockArray		blocks = {0};
+	XvMCMacroBlockArray	macroblocks = {0};
+
+	display = XOpenDisplay(NULL);
+
+	if (!GetPort
+	(
+		display,
+		width,
+		height,
+		XVMC_CHROMA_FORMAT_420,
+    		mc_types,
+    		2,
+    		&port_num,
+    		&surface_type_id,
+    		&is_overlay,
+    		&intra_unsigned
+	))
+	{
+		XCloseDisplay(display);
+		error(1, 0, "Error, unable to find a good port.\n");
+	}
+
+	if (is_overlay)
+	{
+		Atom xv_colorkey = XInternAtom(display, "XV_COLORKEY", 0);
+		XvGetPortAttribute(display, port_num, xv_colorkey, &colorkey);
+	}
+
+	assert(XvMCCreateContext(display, port_num, surface_type_id, width, height, XVMC_DIRECT, &context) == Success);
+	assert(XvMCCreateSurface(display, &context, &surface) == Success);
+
+	/* Test NULL context */
+	assert(XvMCCreateBlocks(display, NULL, 1, &blocks) == XvMCBadContext);
+	/* Test 0 blocks */
+	assert(XvMCCreateBlocks(display, &context, 0, &blocks) == BadValue);
+	/* Test valid params */
+	assert(XvMCCreateBlocks(display, &context, min_required_blocks, &blocks) == Success);
+	/* Test context id assigned and correct */
+	assert(blocks.context_id == context.context_id);
+	/* Test number of blocks assigned and correct */
+	assert(blocks.num_blocks == min_required_blocks);
+	/* Test block pointer valid */
+	assert(blocks.blocks != NULL);
+	/* Test NULL context */
+	assert(XvMCCreateMacroBlocks(display, NULL, 1, &macroblocks) == XvMCBadContext);
+	/* Test 0 macroblocks */
+	assert(XvMCCreateMacroBlocks(display, &context, 0, &macroblocks) == BadValue);
+	/* Test valid params */
+	assert(XvMCCreateMacroBlocks(display, &context, min_required_macroblocks, &macroblocks) == Success);
+	/* Test context id assigned and correct */
+	assert(macroblocks.context_id == context.context_id);
+	/* Test macroblock pointer valid */
+	assert(macroblocks.macro_blocks != NULL);
+	/* Test valid params */
+	assert(XvMCDestroyMacroBlocks(display, &macroblocks) == Success);
+	/* Test valid params */
+	assert(XvMCDestroyBlocks(display, &blocks) == Success);
+
+	assert(XvMCDestroySurface(display, &surface) == Success);
+	assert(XvMCDestroyContext(display, &context) == Success);
+
+	XvUngrabPort(display, port_num, CurrentTime);
+	XCloseDisplay(display);
+
+	return 0;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_context.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_context.c
new file mode 100644
index 00000000000..3da957c9330
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_context.c
@@ -0,0 +1,119 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <assert.h>
+#include <error.h>
+#include "testlib.h"
+
+int main(int argc, char **argv)
+{
+	const unsigned int	width = 16, height = 16;
+	const unsigned int	mc_types[2] = {XVMC_MOCOMP | XVMC_MPEG_2, XVMC_IDCT | XVMC_MPEG_2};
+
+	Display			*display;
+	XvPortID		port_num;
+	int			surface_type_id;
+	unsigned int		is_overlay, intra_unsigned;
+	int			colorkey;
+	XvMCContext		context = {0};
+
+	display = XOpenDisplay(NULL);
+
+	if (!GetPort
+	(
+		display,
+		width,
+		height,
+		XVMC_CHROMA_FORMAT_420,
+    		mc_types,
+    		2,
+    		&port_num,
+    		&surface_type_id,
+    		&is_overlay,
+    		&intra_unsigned
+	))
+	{
+		XCloseDisplay(display);
+		error(1, 0, "Error, unable to find a good port.\n");
+	}
+
+	if (is_overlay)
+	{
+		Atom xv_colorkey = XInternAtom(display, "XV_COLORKEY", 0);
+		XvGetPortAttribute(display, port_num, xv_colorkey, &colorkey);
+	}
+
+	/* Test NULL context */
+	/* XXX: XvMCBadContext not a valid return for XvMCCreateContext in the XvMC API, but openChrome driver returns it */
+	assert(XvMCCreateContext(display, port_num, surface_type_id, width, height, XVMC_DIRECT, NULL) == XvMCBadContext);
+	/* Test invalid port */
+	/* XXX: Success and XvBadPort have the same value, if this call actually gets passed the validation step as of now we'll crash later */
+	assert(XvMCCreateContext(display, -1, surface_type_id, width, height, XVMC_DIRECT, &context) == XvBadPort);
+	/* Test invalid surface */
+	assert(XvMCCreateContext(display, port_num, -1, width, height, XVMC_DIRECT, &context) == BadMatch);
+	/* Test invalid flags */
+	assert(XvMCCreateContext(display, port_num, surface_type_id, width, height, -1, &context) == BadValue);
+	/* Test huge width */
+	assert(XvMCCreateContext(display, port_num, surface_type_id, 16384, height, XVMC_DIRECT, &context) == BadValue);
+	/* Test huge height */
+	assert(XvMCCreateContext(display, port_num, surface_type_id, width, 16384, XVMC_DIRECT, &context) == BadValue);
+	/* Test huge width & height */
+	assert(XvMCCreateContext(display, port_num, surface_type_id, 16384, 16384, XVMC_DIRECT, &context) == BadValue);
+	/* Test valid params */
+	assert(XvMCCreateContext(display, port_num, surface_type_id, width, height, XVMC_DIRECT, &context) == Success);
+	/* Test context id assigned */
+	assert(context.context_id != 0);
+	/* Test surface type id assigned and correct */
+	assert(context.surface_type_id == surface_type_id);
+	/* Test width & height assigned and correct */
+	assert(context.width == width && context.height == height);
+	/* Test port assigned and correct */
+	assert(context.port == port_num);
+	/* Test flags assigned and correct */
+	assert(context.flags == XVMC_DIRECT);
+	/* Test NULL context */
+	assert(XvMCDestroyContext(display, NULL) == XvMCBadContext);
+	/* Test valid params */
+	assert(XvMCDestroyContext(display, &context) == Success);
+	/* Test awkward but valid width */
+	assert(XvMCCreateContext(display, port_num, surface_type_id, width + 1, height, XVMC_DIRECT, &context) == Success);
+	assert(context.width >= width + 1);
+	assert(XvMCDestroyContext(display, &context) == Success);
+	/* Test awkward but valid height */
+	assert(XvMCCreateContext(display, port_num, surface_type_id, width, height + 1, XVMC_DIRECT, &context) == Success);
+	assert(context.height >= height + 1);
+	assert(XvMCDestroyContext(display, &context) == Success);
+	/* Test awkward but valid width & height */
+	assert(XvMCCreateContext(display, port_num, surface_type_id, width + 1, height + 1, XVMC_DIRECT, &context) == Success);
+	assert(context.width >= width + 1 && context.height >= height + 1);
+	assert(XvMCDestroyContext(display, &context) == Success);
+
+	XvUngrabPort(display, port_num, CurrentTime);
+	XCloseDisplay(display);
+
+	return 0;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c
new file mode 100644
index 00000000000..6058783a798
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_rendering.c
@@ -0,0 +1,317 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <error.h>
+#include "testlib.h"
+
+#define BLOCK_WIDTH			8
+#define BLOCK_HEIGHT			8
+#define BLOCK_SIZE			(BLOCK_WIDTH * BLOCK_HEIGHT)
+#define MACROBLOCK_WIDTH		16
+#define MACROBLOCK_HEIGHT		16
+#define MACROBLOCK_WIDTH_IN_BLOCKS	(MACROBLOCK_WIDTH / BLOCK_WIDTH)
+#define MACROBLOCK_HEIGHT_IN_BLOCKS	(MACROBLOCK_HEIGHT / BLOCK_HEIGHT)
+#define BLOCKS_PER_MACROBLOCK		6
+
+#define INPUT_WIDTH			16
+#define INPUT_HEIGHT			16
+#define INPUT_WIDTH_IN_MACROBLOCKS	(INPUT_WIDTH / MACROBLOCK_WIDTH)
+#define INPUT_HEIGHT_IN_MACROBLOCKS	(INPUT_HEIGHT / MACROBLOCK_HEIGHT)
+#define NUM_MACROBLOCKS			(INPUT_WIDTH_IN_MACROBLOCKS * INPUT_HEIGHT_IN_MACROBLOCKS)
+
+#define DEFAULT_OUTPUT_WIDTH		INPUT_WIDTH
+#define DEFAULT_OUTPUT_HEIGHT		INPUT_HEIGHT
+#define DEFAULT_ACCEPTABLE_ERR		0.01
+
+void ParseArgs(int argc, char **argv, unsigned int *output_width, unsigned int *output_height, double *acceptable_error, int *prompt);
+void Gradient(short *block, unsigned int start, unsigned int stop, int horizontal);
+
+void ParseArgs(int argc, char **argv, unsigned int *output_width, unsigned int *output_height, double *acceptable_error, int *prompt)
+{
+	int fail = 0;
+	int i;
+
+	*output_width = DEFAULT_OUTPUT_WIDTH;
+	*output_height = DEFAULT_OUTPUT_WIDTH;
+	*acceptable_error = DEFAULT_ACCEPTABLE_ERR;
+	*prompt = 1;
+
+	for (i = 1; i < argc && !fail; ++i)
+	{
+		if (!strcmp(argv[i], "-w"))
+		{
+			if (sscanf(argv[++i], "%u", output_width) != 1)
+				fail = 1;
+		}
+		else if (!strcmp(argv[i], "-h"))
+		{
+			if (sscanf(argv[++i], "%u", output_height) != 1)
+				fail = 1;
+		}
+		else if (!strcmp(argv[i], "-e"))
+		{
+			if (sscanf(argv[++i], "%lf", acceptable_error) != 1)
+				fail = 1;
+		}
+		else if (strcmp(argv[i], "-n"))
+			*prompt = 0;
+		else
+			fail = 1;
+	}
+
+	if (fail)
+		error
+		(
+			1, 0,
+			"Bad argument.\n"
+			"\n"
+			"Usage: %s [options]\n"
+			"\t-w <width>\tOutput width\n"
+			"\t-h <height>\tOutput height\n"
+			"\t-e <error>\tAcceptable margin of error per pixel, from 0 to 1\n"
+			"\t-n\tDon't prompt for quit\n",
+			argv[0]
+		);
+}
+
+void Gradient(short *block, unsigned int start, unsigned int stop, int horizontal)
+{
+	unsigned int x, y;
+	unsigned int range = stop - start;
+
+	if (horizontal)
+	{
+		for (y = 0; y < BLOCK_HEIGHT; ++y)
+			for (x = 0; x < BLOCK_WIDTH; ++x)
+				block[y * BLOCK_WIDTH + x] = (short)(start + range * (x / (float)(BLOCK_WIDTH - 1)));
+	}
+	else
+	{
+		for (y = 0; y < BLOCK_HEIGHT; ++y)
+			for (x = 0; x < BLOCK_WIDTH; ++x)
+				block[y * BLOCK_WIDTH + x] = (short)(start + range * (y / (float)(BLOCK_HEIGHT - 1)));
+	}
+}
+
+int main(int argc, char **argv)
+{
+	unsigned int		output_width;
+	unsigned int		output_height;
+	double			acceptable_error;
+	int			prompt;
+	Display			*display;
+	Window			root, window;
+	const unsigned int	mc_types[2] = {XVMC_MOCOMP | XVMC_MPEG_2, XVMC_IDCT | XVMC_MPEG_2};
+	XvPortID		port_num;
+	int			surface_type_id;
+	unsigned int		is_overlay, intra_unsigned;
+	int			colorkey;
+	XvMCContext		context;
+	XvMCSurface		surface;
+	XvMCBlockArray		block_array;
+	XvMCMacroBlockArray	mb_array;
+	int			mbx, mby, bx, by;
+	XvMCMacroBlock		*mb;
+	short			*blocks;
+	int			quit = 0;
+
+	ParseArgs(argc, argv, &output_width, &output_height, &acceptable_error, &prompt);
+
+	display = XOpenDisplay(NULL);
+
+	if (!GetPort
+	(
+		display,
+		INPUT_WIDTH,
+		INPUT_HEIGHT,
+		XVMC_CHROMA_FORMAT_420,
+    		mc_types,
+    		2,
+    		&port_num,
+    		&surface_type_id,
+    		&is_overlay,
+    		&intra_unsigned
+	))
+	{
+		XCloseDisplay(display);
+		error(1, 0, "Error, unable to find a good port.\n");
+	}
+
+	if (is_overlay)
+	{
+		Atom xv_colorkey = XInternAtom(display, "XV_COLORKEY", 0);
+		XvGetPortAttribute(display, port_num, xv_colorkey, &colorkey);
+	}
+
+	root = XDefaultRootWindow(display);
+	window = XCreateSimpleWindow(display, root, 0, 0, output_width, output_height, 0, 0, colorkey);
+
+	assert(XvMCCreateContext(display, port_num, surface_type_id, INPUT_WIDTH, INPUT_HEIGHT, XVMC_DIRECT, &context) == Success);
+	assert(XvMCCreateSurface(display, &context, &surface) == Success);
+	assert(XvMCCreateBlocks(display, &context, NUM_MACROBLOCKS * BLOCKS_PER_MACROBLOCK, &block_array) == Success);
+	assert(XvMCCreateMacroBlocks(display, &context, NUM_MACROBLOCKS, &mb_array) == Success);
+
+	mb = mb_array.macro_blocks;
+	blocks = block_array.blocks;
+
+	for (mby = 0; mby < INPUT_HEIGHT_IN_MACROBLOCKS; ++mby)
+		for (mbx = 0; mbx < INPUT_WIDTH_IN_MACROBLOCKS; ++mbx)
+		{
+			mb->x = mbx;
+			mb->y = mby;
+			mb->macroblock_type = XVMC_MB_TYPE_INTRA;
+			/*mb->motion_type = ;*/
+			/*mb->motion_vertical_field_select = ;*/
+			mb->dct_type = XVMC_DCT_TYPE_FRAME;
+			/*mb->PMV[0][0][0] = ;
+			mb->PMV[0][0][1] = ;
+			mb->PMV[0][1][0] = ;
+			mb->PMV[0][1][1] = ;
+			mb->PMV[1][0][0] = ;
+			mb->PMV[1][0][1] = ;
+			mb->PMV[1][1][0] = ;
+			mb->PMV[1][1][1] = ;*/
+			mb->index = (mby * INPUT_WIDTH_IN_MACROBLOCKS + mbx) * BLOCKS_PER_MACROBLOCK;
+			mb->coded_block_pattern = 0x3F;
+
+			mb++;
+
+			for (by = 0; by < MACROBLOCK_HEIGHT_IN_BLOCKS; ++by)
+				for (bx = 0; bx < MACROBLOCK_WIDTH_IN_BLOCKS; ++bx)
+				{
+					const int start = 16, stop = 235, range = stop - start;
+
+					Gradient
+					(
+						blocks,
+						(short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH) / (float)(INPUT_WIDTH - 1))),
+						(short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH + BLOCK_WIDTH - 1) / (float)(INPUT_WIDTH - 1))),
+						1
+					);
+
+					blocks += BLOCK_SIZE;
+				}
+
+			for (by = 0; by < MACROBLOCK_HEIGHT_IN_BLOCKS / 2; ++by)
+				for (bx = 0; bx < MACROBLOCK_WIDTH_IN_BLOCKS / 2; ++bx)
+				{
+					const int start = 16, stop = 240, range = stop - start;
+
+					Gradient
+					(
+						blocks,
+						(short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH) / (float)(INPUT_WIDTH - 1))),
+						(short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH + BLOCK_WIDTH - 1) / (float)(INPUT_WIDTH - 1))),
+						1
+					);
+
+					blocks += BLOCK_SIZE;
+
+					Gradient
+					(
+						blocks,
+						(short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH) / (float)(INPUT_WIDTH - 1))),
+						(short)(start + range * ((mbx * MACROBLOCK_WIDTH + bx * BLOCK_WIDTH + BLOCK_WIDTH - 1) / (float)(INPUT_WIDTH - 1))),
+						1
+					);
+
+					blocks += BLOCK_SIZE;
+				}
+		}
+
+	XSelectInput(display, window, ExposureMask | KeyPressMask);
+	XMapWindow(display, window);
+	XSync(display, 0);
+
+	/* Test NULL context */
+	assert(XvMCRenderSurface(display, NULL, XVMC_FRAME_PICTURE, &surface, NULL, NULL, 0, NUM_MACROBLOCKS, 0, &mb_array, &block_array) == XvMCBadContext);
+	/* Test NULL surface */
+	assert(XvMCRenderSurface(display, &context, XVMC_FRAME_PICTURE, NULL, NULL, NULL, 0, NUM_MACROBLOCKS, 0, &mb_array, &block_array) == XvMCBadSurface);
+	/* Test bad picture structure */
+	assert(XvMCRenderSurface(display, &context, 0, &surface, NULL, NULL, 0, NUM_MACROBLOCKS, 0, &mb_array, &block_array) == BadValue);
+	/* Test valid params */
+	assert(XvMCRenderSurface(display, &context, XVMC_FRAME_PICTURE, &surface, NULL, NULL, 0, NUM_MACROBLOCKS, 0, &mb_array, &block_array) == Success);
+
+	/* Test NULL surface */
+	assert(XvMCPutSurface(display, NULL, window, 0, 0, INPUT_WIDTH, INPUT_HEIGHT, 0, 0, output_width, output_height, XVMC_FRAME_PICTURE) == XvMCBadSurface);
+	/* Test bad window */
+	/* XXX: X halts with a bad drawable for some reason, doesn't return BadDrawable as expected */
+	/*assert(XvMCPutSurface(display, &surface, 0, 0, 0, width, height, 0, 0, width, height, XVMC_FRAME_PICTURE) == BadDrawable);*/
+
+	if (prompt)
+	{
+		puts("Press any button to quit...");
+
+		while (!quit)
+		{
+			if (XPending(display) > 0)
+			{
+				XEvent event;
+
+				XNextEvent(display, &event);
+
+				switch (event.type)
+				{
+					case Expose:
+					{
+						/* Test valid params */
+						assert
+						(
+							XvMCPutSurface
+							(
+								display, &surface, window,
+								0, 0, INPUT_WIDTH, INPUT_HEIGHT,
+								0, 0, output_width, output_height,
+								XVMC_FRAME_PICTURE
+							) == Success
+						);
+						break;
+					}
+					case KeyPress:
+					{
+						quit = 1;
+						break;
+					}
+				}
+			}
+		}
+	}
+
+	assert(XvMCDestroyBlocks(display, &block_array) == Success);
+	assert(XvMCDestroyMacroBlocks(display, &mb_array) == Success);
+	assert(XvMCDestroySurface(display, &surface) == Success);
+	assert(XvMCDestroyContext(display, &context) == Success);
+
+	XvUngrabPort(display, port_num, CurrentTime);
+	XDestroyWindow(display, window);
+	XCloseDisplay(display);
+
+	return 0;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/test_surface.c b/src/gallium/state_trackers/xorg/xvmc/tests/test_surface.c
new file mode 100644
index 00000000000..b65eb265c0a
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/test_surface.c
@@ -0,0 +1,98 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <assert.h>
+#include <error.h>
+#include "testlib.h"
+
+int main(int argc, char **argv)
+{
+	const unsigned int	width = 16, height = 16;
+	const unsigned int	mc_types[2] = {XVMC_MOCOMP | XVMC_MPEG_2, XVMC_IDCT | XVMC_MPEG_2};
+
+	Display			*display;
+	XvPortID		port_num;
+	int			surface_type_id;
+	unsigned int		is_overlay, intra_unsigned;
+	int			colorkey;
+	XvMCContext		context;
+	XvMCSurface		surface = {0};
+
+	display = XOpenDisplay(NULL);
+
+	if (!GetPort
+	(
+		display,
+		width,
+		height,
+		XVMC_CHROMA_FORMAT_420,
+    		mc_types,
+    		2,
+    		&port_num,
+    		&surface_type_id,
+    		&is_overlay,
+    		&intra_unsigned
+	))
+	{
+		XCloseDisplay(display);
+		error(1, 0, "Error, unable to find a good port.\n");
+	}
+
+	if (is_overlay)
+	{
+		Atom xv_colorkey = XInternAtom(display, "XV_COLORKEY", 0);
+		XvGetPortAttribute(display, port_num, xv_colorkey, &colorkey);
+	}
+
+	assert(XvMCCreateContext(display, port_num, surface_type_id, width, height, XVMC_DIRECT, &context) == Success);
+
+	/* Test NULL context */
+	assert(XvMCCreateSurface(display, NULL, &surface) == XvMCBadContext);
+	/* Test NULL surface */
+	assert(XvMCCreateSurface(display, &context, NULL) == XvMCBadSurface);
+	/* Test valid params */
+	assert(XvMCCreateSurface(display, &context, &surface) == Success);
+	/* Test surface id assigned */
+	assert(surface.surface_id != 0);
+	/* Test context id assigned and correct */
+	assert(surface.context_id == context.context_id);
+	/* Test surface type id assigned and correct */
+	assert(surface.surface_type_id == surface_type_id);
+	/* Test width & height assigned and correct */
+	assert(surface.width == width && surface.height == height);
+	/* Test valid params */
+	assert(XvMCDestroySurface(display, &surface) == Success);
+	/* Test NULL surface */
+	assert(XvMCDestroySurface(display, NULL) == XvMCBadSurface);
+
+	assert(XvMCDestroyContext(display, &context) == Success);
+
+	XvUngrabPort(display, port_num, CurrentTime);
+	XCloseDisplay(display);
+
+	return 0;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/testlib.c b/src/gallium/state_trackers/xorg/xvmc/tests/testlib.c
new file mode 100644
index 00000000000..142c09bb590
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/testlib.c
@@ -0,0 +1,146 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "testlib.h"
+#include <stdio.h>
+
+/*
+void test(int pred, const char *pred_string, const char *doc_string, const char *file, unsigned int line)
+{
+	fputs(doc_string, stderr);
+	if (!pred)
+		fprintf(stderr, " FAIL!\n\t\"%s\" at %s:%u\n", pred_string, file, line);
+	else
+		fputs(" PASS!\n", stderr);
+}
+*/
+
+int GetPort
+(
+	Display *display,
+	unsigned int width,
+	unsigned int height,
+	unsigned int chroma_format,
+	const unsigned int *mc_types,
+	unsigned int num_mc_types,
+	XvPortID *port_id,
+	int *surface_type_id,
+	unsigned int *is_overlay,
+	unsigned int *intra_unsigned
+)
+{
+	unsigned int	found_port = 0;
+	XvAdaptorInfo	*adaptor_info;
+	unsigned int	num_adaptors;
+	int		num_types;
+	int		ev_base, err_base;
+	unsigned int	i, j, k, l;
+
+	if (!XvMCQueryExtension(display, &ev_base, &err_base))
+		return 0;
+	if (XvQueryAdaptors(display, XDefaultRootWindow(display), &num_adaptors, &adaptor_info) != Success)
+		return 0;
+
+	for (i = 0; i < num_adaptors && !found_port; ++i)
+	{
+		if (adaptor_info[i].type & XvImageMask)
+		{
+			XvMCSurfaceInfo *surface_info = XvMCListSurfaceTypes(display, adaptor_info[i].base_id, &num_types);
+
+			if (surface_info)
+			{
+				for (j = 0; j < num_types && !found_port; ++j)
+				{
+					if
+					(
+						surface_info[j].chroma_format == chroma_format &&
+						surface_info[j].max_width >= width &&
+						surface_info[j].max_height >= height
+					)
+					{
+						for (k = 0; k < num_mc_types && !found_port; ++k)
+						{
+							if (surface_info[j].mc_type == mc_types[k])
+							{
+								for (l = 0; l < adaptor_info[i].num_ports && !found_port; ++l)
+								{
+									if (XvGrabPort(display, adaptor_info[i].base_id + l, CurrentTime) == Success)
+									{
+										*port_id = adaptor_info[i].base_id + l;
+										*surface_type_id = surface_info[j].surface_type_id;
+										*is_overlay = surface_info[j].flags & XVMC_OVERLAID_SURFACE;
+										*intra_unsigned = surface_info[j].flags & XVMC_INTRA_UNSIGNED;
+										found_port = 1;
+									}
+								}
+							}
+						}
+					}
+				}
+
+				XFree(surface_info);
+			}
+		}
+	}
+
+	XvFreeAdaptorInfo(adaptor_info);
+
+	return found_port;
+}
+
+unsigned int align(unsigned int value, unsigned int alignment)
+{
+	return (value + alignment - 1) & ~(alignment - 1);
+}
+
+/* From the glibc manual */
+int timeval_subtract(struct timeval *result, struct timeval *x, struct timeval *y)
+{
+	/* Perform the carry for the later subtraction by updating y. */
+	if (x->tv_usec < y->tv_usec)
+	{
+		int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
+		y->tv_usec -= 1000000 * nsec;
+		y->tv_sec += nsec;
+	}
+	if (x->tv_usec - y->tv_usec > 1000000)
+	{
+		int nsec = (x->tv_usec - y->tv_usec) / 1000000;
+		y->tv_usec += 1000000 * nsec;
+		y->tv_sec -= nsec;
+	}
+
+	/*
+	 * Compute the time remaining to wait.
+	 * tv_usec is certainly positive.
+	 */
+	result->tv_sec = x->tv_sec - y->tv_sec;
+	result->tv_usec = x->tv_usec - y->tv_usec;
+
+	/* Return 1 if result is negative. */
+	return x->tv_sec < y->tv_sec;
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/testlib.h b/src/gallium/state_trackers/xorg/xvmc/tests/testlib.h
new file mode 100644
index 00000000000..0438e52928b
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/testlib.h
@@ -0,0 +1,69 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef testlib_h
+#define testlib_h
+
+/*
+#define TEST(pred, doc)	test(pred, #pred, doc, __FILE__, __LINE__)
+
+void test(int pred, const char *pred_string, const char *doc_string, const char *file, unsigned int line);
+*/
+
+#include <sys/time.h>
+#include <X11/Xlib.h>
+#include <X11/extensions/XvMClib.h>
+
+/*
+ * display: IN			A valid X display
+ * width, height: IN		Surface size that the port must display
+ * chroma_format: IN		Chroma format that the port must display
+ * mc_types, num_mc_types: IN	List of MC types that the port must support, first port that matches the first mc_type will be returned
+ * port_id: OUT			Your port's ID
+ * surface_type_id: OUT		Your port's surface ID
+ * is_overlay: OUT		If 1, port uses overlay surfaces, you need to set a colorkey
+ * intra_unsigned: OUT		If 1, port uses unsigned values for intra-coded blocks
+ */
+int GetPort
+(
+	Display *display,
+	unsigned int width,
+	unsigned int height,
+	unsigned int chroma_format,
+	const unsigned int *mc_types,
+	unsigned int num_mc_types,
+	XvPortID *port_id,
+	int *surface_type_id,
+	unsigned int *is_overlay,
+	unsigned int *intra_unsigned
+);
+
+unsigned int align(unsigned int value, unsigned int alignment);
+
+int timeval_subtract(struct timeval *result, struct timeval *x, struct timeval *y);
+
+#endif
diff --git a/src/gallium/state_trackers/xorg/xvmc/tests/xvmc_bench.c b/src/gallium/state_trackers/xorg/xvmc/tests/xvmc_bench.c
new file mode 100644
index 00000000000..bf94d856234
--- /dev/null
+++ b/src/gallium/state_trackers/xorg/xvmc/tests/xvmc_bench.c
@@ -0,0 +1,300 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <error.h>
+#include <sys/time.h>
+#include "testlib.h"
+
+#define MACROBLOCK_WIDTH		16
+#define MACROBLOCK_HEIGHT		16
+#define BLOCKS_PER_MACROBLOCK		6
+
+#define DEFAULT_INPUT_WIDTH		720
+#define DEFAULT_INPUT_HEIGHT		480
+#define DEFAULT_REPS			100
+
+#define PIPELINE_STEP_MC		1
+#define PIPELINE_STEP_CSC		2
+#define PIPELINE_STEP_SWAP		4
+
+#define MB_TYPE_I			1
+#define MB_TYPE_P			2
+#define MB_TYPE_B			4
+
+struct Config
+{
+	unsigned int input_width;
+	unsigned int input_height;
+	unsigned int output_width;
+	unsigned int output_height;
+	unsigned int pipeline;
+	unsigned int mb_types;
+	unsigned int reps;
+};
+
+void ParseArgs(int argc, char **argv, struct Config *config);
+
+void ParseArgs(int argc, char **argv, struct Config *config)
+{
+	int fail = 0;
+	int i;
+
+	config->input_width = DEFAULT_INPUT_WIDTH;
+	config->input_height = DEFAULT_INPUT_HEIGHT;
+	config->output_width = 0;
+	config->output_height = 0;
+	config->pipeline = 0;
+	config->mb_types = 0;
+	config->reps = DEFAULT_REPS;
+
+	for (i = 1; i < argc && !fail; ++i)
+	{
+		if (!strcmp(argv[i], "-iw"))
+		{
+			if (sscanf(argv[++i], "%u", &config->input_width) != 1)
+				fail = 1;
+		}
+		else if (!strcmp(argv[i], "-ih"))
+		{
+			if (sscanf(argv[++i], "%u", &config->input_height) != 1)
+				fail = 1;
+		}
+		else if (!strcmp(argv[i], "-ow"))
+		{
+			if (sscanf(argv[++i], "%u", &config->output_width) != 1)
+				fail = 1;
+		}
+		else if (!strcmp(argv[i], "-oh"))
+		{
+			if (sscanf(argv[++i], "%u", &config->output_height) != 1)
+				fail = 1;
+		}
+		else if (!strcmp(argv[i], "-p"))
+		{
+			char *token = strtok(argv[++i], ",");
+
+			while (token && !fail)
+			{
+				if (!strcmp(token, "mc"))
+					config->pipeline |= PIPELINE_STEP_MC;
+				else if (!strcmp(token, "csc"))
+					config->pipeline |= PIPELINE_STEP_CSC;
+				else if (!strcmp(token, "swp"))
+					config->pipeline |= PIPELINE_STEP_SWAP;
+				else
+					fail = 1;
+
+				if (!fail)
+					token = strtok(NULL, ",");
+			}
+		}
+		else if (!strcmp(argv[i], "-mb"))
+		{
+			char *token = strtok(argv[++i], ",");
+
+			while (token && !fail)
+			{
+				if (strcmp(token, "i"))
+					config->mb_types |= MB_TYPE_I;
+				else if (strcmp(token, "p"))
+					config->mb_types |= MB_TYPE_P;
+				else if (strcmp(token, "b"))
+					config->mb_types |= MB_TYPE_B;
+				else
+					fail = 1;
+
+				if (!fail)
+					token = strtok(NULL, ",");
+			}
+		}
+		else if (!strcmp(argv[i], "-r"))
+		{
+			if (sscanf(argv[++i], "%u", &config->reps) != 1)
+				fail = 1;
+		}
+		else
+			fail = 1;
+	}
+
+	if (fail)
+		error
+		(
+			1, 0,
+			"Bad argument.\n"
+			"\n"
+			"Usage: %s [options]\n"
+			"\t-iw <width>\tInput width\n"
+			"\t-ih <height>\tInput height\n"
+			"\t-ow <width>\tOutput width\n"
+			"\t-oh <height>\tOutput height\n"
+			"\t-p <pipeline>\tPipeline to test\n"
+			"\t-mb <mb type>\tMacroBlock types to use\n"
+			"\t-r <reps>\tRepetitions\n\n"
+			"\tPipeline steps: mc,csc,swap\n"
+			"\tMB types: i,p,b\n",
+			argv[0]
+		);
+
+	if (config->output_width == 0)
+		config->output_width = config->input_width;
+	if (config->output_height == 0)
+		config->output_height = config->input_height;
+	if (!config->pipeline)
+		config->pipeline = PIPELINE_STEP_MC | PIPELINE_STEP_CSC | PIPELINE_STEP_SWAP;
+	if (!config->mb_types)
+		config->mb_types = MB_TYPE_I | MB_TYPE_P | MB_TYPE_B;
+}
+
+int main(int argc, char **argv)
+{
+	struct Config		config;
+	Display			*display;
+	Window			root, window;
+	const unsigned int	mc_types[2] = {XVMC_MOCOMP | XVMC_MPEG_2, XVMC_IDCT | XVMC_MPEG_2};
+	XvPortID		port_num;
+	int			surface_type_id;
+	unsigned int		is_overlay, intra_unsigned;
+	int			colorkey;
+	XvMCContext		context;
+	XvMCSurface		surface;
+	XvMCBlockArray		block_array;
+	XvMCMacroBlockArray	mb_array;
+	unsigned int		mbw, mbh;
+	unsigned int		mbx, mby;
+	unsigned int		reps;
+	struct timeval		start, stop, diff;
+	double			diff_secs;
+
+	ParseArgs(argc, argv, &config);
+
+	mbw = align(config.input_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
+	mbh = align(config.input_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
+
+	display = XOpenDisplay(NULL);
+
+	if (!GetPort
+	(
+		display,
+		config.input_width,
+		config.input_height,
+		XVMC_CHROMA_FORMAT_420,
+    		mc_types,
+    		2,
+    		&port_num,
+    		&surface_type_id,
+    		&is_overlay,
+    		&intra_unsigned
+	))
+	{
+		XCloseDisplay(display);
+		error(1, 0, "Error, unable to find a good port.\n");
+	}
+
+	if (is_overlay)
+	{
+		Atom xv_colorkey = XInternAtom(display, "XV_COLORKEY", 0);
+		XvGetPortAttribute(display, port_num, xv_colorkey, &colorkey);
+	}
+
+	root = XDefaultRootWindow(display);
+	window = XCreateSimpleWindow(display, root, 0, 0, config.output_width, config.output_height, 0, 0, colorkey);
+
+	assert(XvMCCreateContext(display, port_num, surface_type_id, config.input_width, config.input_height, XVMC_DIRECT, &context) == Success);
+	assert(XvMCCreateSurface(display, &context, &surface) == Success);
+	assert(XvMCCreateBlocks(display, &context, mbw * mbh * BLOCKS_PER_MACROBLOCK, &block_array) == Success);
+	assert(XvMCCreateMacroBlocks(display, &context, mbw * mbh, &mb_array) == Success);
+
+	for (mby = 0; mby < mbh; ++mby)
+		for (mbx = 0; mbx < mbw; ++mbx)
+		{
+			mb_array.macro_blocks[mby * mbw + mbx].x = mbx;
+			mb_array.macro_blocks[mby * mbw + mbx].y = mby;
+			mb_array.macro_blocks[mby * mbw + mbx].macroblock_type = XVMC_MB_TYPE_INTRA;
+			/*mb->motion_type = ;*/
+			/*mb->motion_vertical_field_select = ;*/
+			mb_array.macro_blocks[mby * mbw + mbx].dct_type = XVMC_DCT_TYPE_FRAME;
+			/*mb->PMV[0][0][0] = ;
+			mb->PMV[0][0][1] = ;
+			mb->PMV[0][1][0] = ;
+			mb->PMV[0][1][1] = ;
+			mb->PMV[1][0][0] = ;
+			mb->PMV[1][0][1] = ;
+			mb->PMV[1][1][0] = ;
+			mb->PMV[1][1][1] = ;*/
+			mb_array.macro_blocks[mby * mbw + mbx].index = (mby * mbw + mbx) * BLOCKS_PER_MACROBLOCK;
+			mb_array.macro_blocks[mby * mbw + mbx].coded_block_pattern = 0x3F;
+		}
+
+	XSelectInput(display, window, ExposureMask | KeyPressMask);
+	XMapWindow(display, window);
+	XSync(display, 0);
+
+	gettimeofday(&start, NULL);
+
+	for (reps = 0; reps < config.reps; ++reps)
+	{
+		if (config.pipeline & PIPELINE_STEP_MC)
+		{
+			assert(XvMCRenderSurface(display, &context, XVMC_FRAME_PICTURE, &surface, NULL, NULL, 0, mbw * mbh, 0, &mb_array, &block_array) == Success);
+			assert(XvMCFlushSurface(display, &surface) == Success);
+		}
+		if (config.pipeline & PIPELINE_STEP_CSC)
+			assert(XvMCPutSurface(display, &surface, window, 0, 0, config.input_width, config.input_height, 0, 0, config.output_width, config.output_height, XVMC_FRAME_PICTURE) == Success);
+	}
+
+	gettimeofday(&stop, NULL);
+
+	timeval_subtract(&diff, &stop, &start);
+	diff_secs = (double)diff.tv_sec + (double)diff.tv_usec / 1000000.0;
+
+	printf("XvMC Benchmark\n");
+	printf("Input: %u,%u\nOutput: %u,%u\n", config.input_width, config.input_height, config.output_width, config.output_height);
+	printf("Pipeline: ");
+	if (config.pipeline & PIPELINE_STEP_MC)
+		printf("|mc|");
+	if (config.pipeline & PIPELINE_STEP_CSC)
+		printf("|csc|");
+	if (config.pipeline & PIPELINE_STEP_SWAP)
+		printf("|swap|");
+	printf("\n");
+	printf("Reps: %u\n", config.reps);
+	printf("Total time: %.2lf (%.2lf reps / sec)\n", diff_secs, config.reps / diff_secs);
+
+	assert(XvMCDestroyBlocks(display, &block_array) == Success);
+	assert(XvMCDestroyMacroBlocks(display, &mb_array) == Success);
+	assert(XvMCDestroySurface(display, &surface) == Success);
+	assert(XvMCDestroyContext(display, &context) == Success);
+
+	XvUngrabPort(display, port_num, CurrentTime);
+	XDestroyWindow(display, window);
+	XCloseDisplay(display);
+
+	return 0;
+}
-- 
cgit v1.2.3


From ea3a01ae4d2117b733c0d415ce5bc69015984d30 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 29 May 2010 19:22:14 -0400
Subject: vl: Get nouveau building again.

Still some DRI2 bits to sort out.
---
 configure.ac                                     | 23 ++++++--
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  2 +-
 src/gallium/drivers/nvfx/Makefile                |  7 +--
 src/gallium/drivers/nvfx/nvfx_screen.c           |  2 +
 src/gallium/drivers/nvfx/nvfx_video_context.c    | 48 +++++++++++++++++
 src/gallium/drivers/nvfx/nvfx_video_context.h    | 38 ++++++++++++++
 src/gallium/drivers/softpipe/sp_video_context.c  |  3 +-
 src/gallium/targets/Makefile.xvmc                |  2 +-
 src/gallium/targets/xvmc-nouveau/Makefile        | 23 ++++++++
 src/gallium/winsys/g3dvl/dri/dri_winsys.c        | 67 +++++++++++++++---------
 10 files changed, 182 insertions(+), 33 deletions(-)
 create mode 100644 src/gallium/drivers/nvfx/nvfx_video_context.c
 create mode 100644 src/gallium/drivers/nvfx/nvfx_video_context.h
 create mode 100644 src/gallium/targets/xvmc-nouveau/Makefile

diff --git a/configure.ac b/configure.ac
index 7062817ea1c..757bc1e8e78 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1255,6 +1255,13 @@ yes)
             # mesa/es is required to build es state tracker
             CORE_DIRS="$CORE_DIRS mesa/es"
             ;;
+        xorg/xvmc)
+            # Check for libXvMC?
+            if test "x$enable_gallium_g3dvl" != xyes; then
+                AC_MSG_ERROR([cannot build XvMC state tracker without --enable-gallium-g3dvl])
+            fi
+            HAVE_ST_XVMC="yes"
+            ;;
         esac
     done
     GALLIUM_STATE_TRACKERS_DIRS="$state_trackers"
@@ -1358,7 +1365,7 @@ dnl
 dnl Gallium helper functions
 dnl
 gallium_check_st() {
-    if test "x$HAVE_ST_DRI" = xyes || test "x$HAVE_ST_EGL" = xyes || test "x$HAVE_ST_XORG" = xyes; then
+    if test "x$HAVE_ST_DRI" = xyes || test "x$HAVE_ST_EGL" = xyes || test "x$HAVE_ST_XORG" = xyes || test "x$HAVE_ST_XVMC" = xyes; then
          GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS $1"
     fi
     if test "x$HAVE_ST_DRI" = xyes && test "x$2" != x; then
@@ -1370,6 +1377,9 @@ gallium_check_st() {
     if test "x$HAVE_ST_XORG" = xyes && test "x$4" != x; then
          GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $4"
     fi
+    if test "x$HAVE_ST_XVMC" = xyes && test "x$5" != x; then
+         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $5"
+    fi
 }
 
 
@@ -1430,7 +1440,7 @@ AC_ARG_ENABLE([gallium-nouveau],
     [enable_gallium_nouveau=no])
 if test "x$enable_gallium_nouveau" = xyes; then
     GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50"
-    gallium_check_st "nouveau/drm" "dri-nouveau" "egl-nouveau" "xorg-nouveau"
+    gallium_check_st "nouveau/drm" "dri-nouveau" "egl-nouveau" "xorg-nouveau" "xvmc-nouveau"
 fi
 
 dnl
@@ -1442,7 +1452,14 @@ AC_ARG_ENABLE([gallium-g3dvl],
     [enable_gallium_g3dvl="$enableval"],
     [enable_gallium_g3dvl=no])
 if test "x$enable_gallium_g3dvl" = xyes; then
-    GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS xvmc-softpipe"
+    case "$mesa_driver" in
+    xlib)
+        GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS xvmc-softpipe"
+        ;;
+    dri)
+        GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS g3dvl/dri"
+        ;;
+    esac
 fi
 
 dnl
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index bafe4861fa5..fa7028b5d3b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -385,7 +385,7 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
          0, 0, 0,
          r->textures.all[i]->width0,
          r->textures.all[i]->height0,
-         0
+         1
       };
 
       r->tex_transfer[i] = r->pipe->get_transfer
diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile
index c1d57ca3969..e7ca6e6cb57 100644
--- a/src/gallium/drivers/nvfx/Makefile
+++ b/src/gallium/drivers/nvfx/Makefile
@@ -19,17 +19,18 @@ C_SOURCES = \
 	nvfx_screen.c \
 	nvfx_state.c \
 	nvfx_state_blend.c \
-        nvfx_state_emit.c \
+	nvfx_state_emit.c \
 	nvfx_state_fb.c \
 	nvfx_state_rasterizer.c \
 	nvfx_state_scissor.c \
-        nvfx_state_stipple.c \
+	nvfx_state_stipple.c \
 	nvfx_state_viewport.c \
 	nvfx_state_zsa.c \
 	nvfx_surface.c \
 	nvfx_transfer.c \
 	nvfx_vbo.c \
-	nvfx_vertprog.c
+	nvfx_vertprog.c \
+	nvfx_video_context.c
 
 LIBRARY_INCLUDES = \
 	-I$(TOP)/src/gallium/drivers/nouveau/include
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index 9f03ab1833b..a4d5c61b7aa 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -5,6 +5,7 @@
 #include "nouveau/nouveau_screen.h"
 
 #include "nvfx_context.h"
+#include "nvfx_video_context.h"
 #include "nvfx_screen.h"
 #include "nvfx_resource.h"
 
@@ -341,6 +342,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	pscreen->get_paramf = nvfx_screen_get_paramf;
 	pscreen->is_format_supported = nvfx_screen_surface_format_supported;
 	pscreen->context_create = nvfx_create;
+	pscreen->video_context_create = nvfx_video_create;
 
 	switch (dev->chipset & 0xf0) {
 	case 0x30:
diff --git a/src/gallium/drivers/nvfx/nvfx_video_context.c b/src/gallium/drivers/nvfx/nvfx_video_context.c
new file mode 100644
index 00000000000..5b85b29e29b
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_video_context.c
@@ -0,0 +1,48 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "nvfx_video_context.h"
+#include <softpipe/sp_video_context.h>
+
+struct pipe_video_context *
+nvfx_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height, void *priv)
+{
+   struct pipe_context *pipe;
+
+   assert(screen);
+
+   pipe = screen->context_create(screen, priv);
+   if (!pipe)
+      return NULL;
+
+   return sp_video_create_ex(pipe, profile, chroma_format, width, height,
+                             VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
+                             VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
+                             true);
+}
diff --git a/src/gallium/drivers/nvfx/nvfx_video_context.h b/src/gallium/drivers/nvfx/nvfx_video_context.h
new file mode 100644
index 00000000000..6619427cc29
--- /dev/null
+++ b/src/gallium/drivers/nvfx/nvfx_video_context.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef __NVFX_VIDEO_CONTEXT_H__
+#define __NVFX_VIDEO_CONTEXT_H__
+
+#include <pipe/p_video_context.h>
+
+struct pipe_video_context *
+nvfx_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height, void *priv);
+
+#endif
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 7024e653e3b..fcee9b66823 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -74,7 +74,8 @@ sp_mpeg12_get_param(struct pipe_video_context *vpipe, int param)
 #endif
          return FALSE;
       case PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT:
-         return PIPE_FORMAT_AYUV;
+         //return PIPE_FORMAT_AYUV;
+         return PIPE_FORMAT_VUYA;
       default:
       {
          debug_printf("Softpipe: Unknown PIPE_CAP %d\n", param);
diff --git a/src/gallium/targets/Makefile.xvmc b/src/gallium/targets/Makefile.xvmc
index 53044342317..e48906a1345 100644
--- a/src/gallium/targets/Makefile.xvmc
+++ b/src/gallium/targets/Makefile.xvmc
@@ -30,7 +30,7 @@ OBJECTS = $(C_SOURCES:.c=.o) \
 
 default: depend symlinks $(TOP)/$(LIB_DIR)/gallium/$(LIBNAME)
 
-$(TOP)/$(LIB_DIR)/gallium/$(LIBNAME): $(OBJECTS) $(PIPE_DRIVERS) $(STATE_TRACKER-LIB) $(TOP)/$(LIB_DIR)/gallium Makefile
+$(TOP)/$(LIB_DIR)/gallium/$(LIBNAME): $(OBJECTS) $(PIPE_DRIVERS) $(STATE_TRACKER_LIB) $(TOP)/$(LIB_DIR)/gallium Makefile
 	$(MKLIB) -o $(LIBBASENAME) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
 		-major $(XVMC_MAJOR) -minor $(XVMC_MINOR) $(MKLIB_OPTIONS) \
 		-install $(TOP)/$(LIB_DIR)/gallium \
diff --git a/src/gallium/targets/xvmc-nouveau/Makefile b/src/gallium/targets/xvmc-nouveau/Makefile
new file mode 100644
index 00000000000..045dbcbf97b
--- /dev/null
+++ b/src/gallium/targets/xvmc-nouveau/Makefile
@@ -0,0 +1,23 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+#LIBNAME =
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
+	$(TOP)/src/gallium/winsys/nouveau/drm/libnouveaudrm.a \
+	$(TOP)/src/gallium/drivers/nvfx/libnvfx.a \
+	$(TOP)/src/gallium/drivers/nv50/libnv50.a \
+	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+	$(TOP)/src/gallium/drivers/nouveau/libnouveau.a \
+	$(TOP)/src/gallium/auxiliary/libgallium.a
+
+C_SOURCES = \
+	$(COMMON_GALLIUM_SOURCES) \
+	$(DRIVER_SOURCES)
+
+DRIVER_LIBS = $(shell pkg-config libdrm_nouveau --libs)
+
+include ../Makefile.xvmc
+
+symlinks:
diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
index b86f546dd61..892f93df470 100644
--- a/src/gallium/winsys/g3dvl/dri/dri_winsys.c
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -279,7 +279,7 @@ vl_dri2_get_front(struct vl_dri_screen *vl_dri_scrn, Drawable drawable)
    unsigned int attachments[1] = {DRI_BUFFER_FRONT_LEFT};
    int count;
    DRI2Buffer *dri2_front;
-   struct pipe_resource template, *front_tex;
+   struct pipe_resource *front_tex;
    struct pipe_surface *front_surf = NULL;
 
    assert(vl_dri_scrn);
@@ -293,6 +293,19 @@ vl_dri2_get_front(struct vl_dri_screen *vl_dri_scrn, Drawable drawable)
          .handle = dri2_front->name,
          .stride = dri2_front->pitch
       };
+      struct pipe_resource template;
+
+      memset(&template, 0, sizeof(struct pipe_resource));
+      template.target = PIPE_TEXTURE_2D;
+      template.format = vl_dri_scrn->base.format;
+      template.last_level = 0;
+      template.width0 = w;
+      template.height0 = h;
+      template.depth0 = 1;
+      template.usage = PIPE_USAGE_STATIC;
+      template.bind = PIPE_BIND_RENDER_TARGET;
+      template.flags = 0;
+
       front_tex = vl_dri_scrn->base.pscreen->resource_from_handle(vl_dri_scrn->base.pscreen, &template, &dri2_front_handle);
       if (front_tex)
          front_surf = vl_dri_scrn->base.pscreen->get_tex_surface(vl_dri_scrn->base.pscreen,
@@ -326,33 +339,31 @@ vl_dri2_flush_frontbuffer(struct pipe_screen *screen,
    //st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, fence);
 }
 
+/* XXX: Kill with fire */
+struct vl_dri_context *_vl_dri_ctx = NULL;
 
-Drawable
-vl_video_bind_drawable(struct vl_context *vctx, Drawable drawable)
+void*
+vl_displaytarget_get(struct vl_screen *vscreen, Drawable drawable,
+                     unsigned *width, unsigned *height)
 {
-   struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)vctx;
-   struct vl_dri_screen *vl_dri_scrn;
-   dri_drawable_t *dri_drawable;
-   Drawable old_drawable = None;
-
-   assert(vctx);
-
-   if (vl_dri_ctx->drawable)
-      old_drawable = vl_dri_ctx->drawable->x_drawable;
+   struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)vscreen;
 
-   if (drawable != old_drawable) {
-      vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
-      if (vl_dri_scrn->dri2) {
-         /* XXX: Need dri2CreateDrawable()? */
-         vl_dri_ctx->dri2_front = vl_dri2_get_front(vl_dri_scrn, drawable);
-      }
-      else {
-         driCreateDrawable(vl_dri_scrn->dri_screen, drawable, &dri_drawable);
-         vl_dri_ctx->drawable = dri_drawable;
+   assert(vscreen);
+   assert(width);
+   assert(height);
+
+   if (vl_dri_scrn->dri2 && _vl_dri_ctx) {
+      if (!_vl_dri_ctx->dri2_front) {
+         _vl_dri_ctx->dri2_front = vl_dri2_get_front((struct vl_dri_screen*)vscreen, drawable);
+         if (!_vl_dri_ctx->dri2_front)
+            return NULL;
+         *width = _vl_dri_ctx->dri2_front->width;
+         *height = _vl_dri_ctx->dri2_front->height;
       }
+      return _vl_dri_ctx;
    }
-
-   return old_drawable;
+   else
+      return NULL;
 }
 
 struct vl_screen*
@@ -416,8 +427,12 @@ vl_screen_create(Display *display, int screen)
       vl_dri_scrn->base.format = vl_dri_scrn->api_hooks->front_srf_locked(vl_dri_scrn->base.pscreen)->format;
       vl_dri_scrn->base.pscreen->flush_frontbuffer = vl_dri_flush_frontbuffer;
    }
-   else
+   else {
+      /* XXX: Fuuuuu... Can't possibly get this right with current code.
+       * Need to rethink this in st/xvmc and winsys dri/xlib winsyses */
+      vl_dri_scrn->base.format = PIPE_FORMAT_B8G8R8X8_UNORM;
       vl_dri_scrn->base.pscreen->flush_frontbuffer = vl_dri2_flush_frontbuffer;
+   }
 
    return &vl_dri_scrn->base;
 }
@@ -476,6 +491,8 @@ vl_video_create(struct vl_screen *vscreen,
    vl_dri_ctx->fd = vl_dri_scrn->dri_screen->fd;
    if (!vl_dri_scrn->dri2)
       vl_dri_ctx->lock = (drmLock*)&vl_dri_scrn->dri_screen->sarea->lock;
+   else
+      _vl_dri_ctx = vl_dri_ctx;
 
    return &vl_dri_ctx->base;
 }
@@ -487,6 +504,8 @@ void vl_video_destroy(struct vl_context *vctx)
    assert(vctx);
 
    vl_dri_ctx->base.vpipe->destroy(vl_dri_ctx->base.vpipe);
+   if (vl_dri_ctx->dri2_front)
+      pipe_surface_reference(&vl_dri_ctx->dri2_front, NULL);
    if (!((struct vl_dri_screen *)vctx->vscreen)->dri2)
       driDestroyContext(vl_dri_ctx->dri_context);
    FREE(vl_dri_ctx);
-- 
cgit v1.2.3


From 156fbb9fc530cd2a1d57dc516f67e720ce7f2238 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Mon, 31 May 2010 00:35:20 -0400
Subject: vl: Check mo_type not mb_type when setting motion vectors.

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index fa7028b5d3b..e9024e4a409 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -886,7 +886,7 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
             }
          }
 
-         if (mb->mb_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+         if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
             for (i = 0; i < 24 * 2; i += 2) {
                vb[i].x = mo_vec[0].x;
                vb[i].y = mo_vec[0].y;
-- 
cgit v1.2.3


From 6414952efe3b53fd33d73d592da74975a1075330 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sun, 6 Jun 2010 11:48:47 -0400
Subject: vl: Drop DRI1, clean up DRI2 bits.

---
 src/gallium/drivers/nvfx/nvfx_video_context.c   |   3 +-
 src/gallium/drivers/softpipe/sp_video_context.c |  18 +-
 src/gallium/drivers/softpipe/sp_video_context.h |   7 +-
 src/gallium/state_trackers/xorg/xvmc/surface.c  |  26 +-
 src/gallium/targets/xvmc-nouveau/Makefile       |   2 +-
 src/gallium/winsys/g3dvl/dri/dri_winsys.c       | 443 ++++++------------------
 src/gallium/winsys/g3dvl/dri/driclient.c        |  19 +
 src/gallium/winsys/g3dvl/dri/driclient.h        |   1 +
 src/gallium/winsys/g3dvl/vl_winsys.h            |   9 +-
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c      | 115 ++++--
 10 files changed, 252 insertions(+), 391 deletions(-)

diff --git a/src/gallium/drivers/nvfx/nvfx_video_context.c b/src/gallium/drivers/nvfx/nvfx_video_context.c
index 5b85b29e29b..4e21f35f40d 100644
--- a/src/gallium/drivers/nvfx/nvfx_video_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_video_context.c
@@ -44,5 +44,6 @@ nvfx_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
    return sp_video_create_ex(pipe, profile, chroma_format, width, height,
                              VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
                              VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
-                             true);
+                             true,
+                             PIPE_FORMAT_VUYA);
 }
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index fcee9b66823..9aec8a8c4fe 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -74,8 +74,7 @@ sp_mpeg12_get_param(struct pipe_video_context *vpipe, int param)
 #endif
          return FALSE;
       case PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT:
-         //return PIPE_FORMAT_AYUV;
-         return PIPE_FORMAT_VUYA;
+         return ctx->decode_format;
       default:
       {
          debug_printf("Softpipe: Unknown PIPE_CAP %d\n", param);
@@ -322,7 +321,8 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
                  unsigned width, unsigned height,
                  enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
                  enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
-                 bool pot_buffers)
+                 bool pot_buffers,
+                 enum pipe_format decode_format)
 {
    struct sp_mpeg12_context *ctx;
 
@@ -352,6 +352,7 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
    ctx->base.set_csc_matrix = sp_mpeg12_set_csc_matrix;
 
    ctx->pipe = pipe;
+   ctx->decode_format = decode_format;
 
    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
                                    width, height, chroma_format,
@@ -382,7 +383,7 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
 struct pipe_video_context *
 sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
                 enum pipe_video_chroma_format chroma_format,
-                unsigned width, unsigned height)
+                unsigned width, unsigned height, void *priv)
 {
    struct pipe_context *pipe;
 
@@ -400,7 +401,8 @@ sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
                              width, height,
                              VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
                              VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
-                             true);
+                             true,
+                             PIPE_FORMAT_AYUV);
 }
 
 struct pipe_video_context *
@@ -409,7 +411,8 @@ sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
                    unsigned width, unsigned height,
                    enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
                    enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
-                   bool pot_buffers)
+                   bool pot_buffers,
+                   enum pipe_format decode_format)
 {
    assert(pipe);
    assert(width && height);
@@ -420,7 +423,8 @@ sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
                                  chroma_format,
                                  width, height,
                                  bufmode, eb_handling,
-                                 pot_buffers);
+                                 pot_buffers,
+                                 decode_format);
       default:
          return NULL;
    }
diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h
index bc5daa05ac6..0fe48d7a872 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.h
+++ b/src/gallium/drivers/softpipe/sp_video_context.h
@@ -46,12 +46,14 @@ struct sp_mpeg12_context
    void *rast;
    void *dsa;
    void *blend;
+
+   enum pipe_format decode_format;
 };
 
 struct pipe_video_context *
 sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
                 enum pipe_video_chroma_format chroma_format,
-                unsigned width, unsigned height);
+                unsigned width, unsigned height, void *priv);
 
 /* Other drivers can call this function in their pipe_video_context constructors and pass it
    an accelerated pipe_context along with suitable buffering modes, etc */
@@ -61,6 +63,7 @@ sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
                    unsigned width, unsigned height,
                    enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
                    enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
-                   bool pot_buffers);
+                   bool pot_buffers,
+                   enum pipe_format decode_format);
 
 #endif /* SP_VIDEO_CONTEXT_H */
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 985cc1a97a6..ea1f648e7fc 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -94,6 +94,7 @@ static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, int xvmc_
    return -1;
 }
 
+#if 0
 static bool
 CreateOrResizeBackBuffer(struct vl_context *vctx, unsigned int width, unsigned int height,
                          struct pipe_surface **backbuffer)
@@ -141,6 +142,7 @@ CreateOrResizeBackBuffer(struct vl_context *vctx, unsigned int width, unsigned i
 
    return true;
 }
+#endif
 
 static void
 MacroBlocksToPipe(struct pipe_screen *screen,
@@ -280,7 +282,6 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    XvMCSurfacePrivate *past_surface_priv;
    XvMCSurfacePrivate *future_surface_priv;
    struct pipe_mpeg12_macroblock pipe_macroblocks[num_macroblocks];
-   unsigned int i;
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Rendering to surface %p.\n", target_surface);
 
@@ -331,7 +332,7 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
 
    vpipe->set_decode_target(vpipe, t_vsfc);
    vpipe->decode_macroblocks(vpipe, p_vsfc, f_vsfc, num_macroblocks,
-                             &pipe_macroblocks->base, target_surface_priv->render_fence);
+                             &pipe_macroblocks->base, &target_surface_priv->render_fence);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for rendering.\n", target_surface);
 
@@ -373,8 +374,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    XvMCContext *context;
    struct pipe_video_rect src_rect = {srcx, srcy, srcw, srch};
    struct pipe_video_rect dst_rect = {destx, desty, destw, desth};
-   void *displaytarget;
-   unsigned width, height;
+   struct pipe_surface *drawable_surface;
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Displaying surface %p.\n", surface);
 
@@ -387,8 +387,8 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    context = surface_priv->context;
    context_priv = context->privData;
 
-   displaytarget = vl_displaytarget_get(context_priv->vctx->vscreen, drawable, &width, &height);
-   if (!displaytarget)
+   drawable_surface = vl_drawable_surface_get(context_priv->vctx->vscreen, drawable);
+   if (!drawable_surface)
       return BadDrawable;
 
    assert(flags == XVMC_TOP_FIELD || flags == XVMC_BOTTOM_FIELD || flags == XVMC_FRAME_PICTURE);
@@ -402,15 +402,17 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
     * until the app updates destw and desth.
     */
    /*
-   assert(destx + destw - 1 < width);
-   assert(desty + desth - 1 < height);
+   assert(destx + destw - 1 < drawable_surface->width);
+   assert(desty + desth - 1 < drawable_surface->height);
     */
 
    subpicture_priv = surface_priv->subpicture ? surface_priv->subpicture->privData : NULL;
    vpipe = context_priv->vctx->vpipe;
 
+#if 0
    if (!CreateOrResizeBackBuffer(context_priv->vctx, width, height, &context_priv->backbuffer))
       return BadAlloc;
+#endif
 
    if (subpicture_priv) {
       struct pipe_video_rect src_rect = {surface_priv->subx, surface_priv->suby, surface_priv->subw, surface_priv->subh};
@@ -430,17 +432,19 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
       vpipe->set_picture_layers(vpipe, NULL, NULL, NULL, 0);
 
    vpipe->render_picture(vpipe, surface_priv->pipe_vsfc, PictureToPipe(flags), &src_rect,
-                         context_priv->backbuffer, &dst_rect, surface_priv->disp_fence);
+                         drawable_surface, &dst_rect, surface_priv->disp_fence);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);
 
    vpipe->screen->flush_frontbuffer
    (
       vpipe->screen,
-      context_priv->backbuffer,
-      displaytarget
+      drawable_surface,
+      vl_contextprivate_get(context_priv->vctx, drawable_surface)
    );
 
+   pipe_surface_reference(&drawable_surface, NULL);
+
    XVMC_MSG(XVMC_TRACE, "[XvMC] Pushed surface %p to front buffer.\n", surface);
 
    return Success;
diff --git a/src/gallium/targets/xvmc-nouveau/Makefile b/src/gallium/targets/xvmc-nouveau/Makefile
index 045dbcbf97b..fe418b07681 100644
--- a/src/gallium/targets/xvmc-nouveau/Makefile
+++ b/src/gallium/targets/xvmc-nouveau/Makefile
@@ -16,7 +16,7 @@ C_SOURCES = \
 	$(COMMON_GALLIUM_SOURCES) \
 	$(DRIVER_SOURCES)
 
-DRIVER_LIBS = $(shell pkg-config libdrm_nouveau --libs)
+DRIVER_LIBS = $(shell pkg-config libdrm_nouveau --libs) -lXfixes
 
 include ../Makefile.xvmc
 
diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
index 892f93df470..06631840f78 100644
--- a/src/gallium/winsys/g3dvl/dri/dri_winsys.c
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -27,251 +27,29 @@
 
 #include <vl_winsys.h>
 #include <driclient.h>
-#include <state_tracker/dri1_api.h>
 #include <pipe/p_video_context.h>
 #include <pipe/p_state.h>
 #include <util/u_memory.h>
+#include <util/u_hash.h>
+#include <util/u_hash_table.h>
+#include <state_tracker/drm_api.h>
+#include <X11/Xlibint.h>
 
 struct vl_dri_screen
 {
    struct vl_screen base;
-   Visual *visual;
    struct drm_api *api;
    dri_screen_t *dri_screen;
-   dri_framebuffer_t dri_framebuf;
-   struct dri1_api *api_hooks;
-   boolean dri2;
+   struct util_hash_table *drawable_table;
+   Drawable last_seen_drawable;
 };
 
 struct vl_dri_context
 {
    struct vl_context base;
-   boolean is_locked;
-   boolean lost_lock;
-   drmLock *lock;
-   dri_context_t *dri_context;
    int fd;
-   struct pipe_video_context *vpipe;
-   dri_drawable_t *drawable;
-   struct pipe_surface *dri2_front;
 };
 
-static void
-vl_dri_lock(void *priv)
-{
-   struct vl_dri_context *vl_dri_ctx = priv;
-   drm_context_t hw_context;
-   char ret = 0;
-
-   assert(priv);
-
-   hw_context = vl_dri_ctx->dri_context->drm_context;
-
-   DRM_CAS(vl_dri_ctx->lock, hw_context, DRM_LOCK_HELD | hw_context, ret);
-   if (ret) {
-      drmGetLock(vl_dri_ctx->fd, hw_context, 0);
-      vl_dri_ctx->lost_lock = TRUE;
-   }
-   vl_dri_ctx->is_locked = TRUE;
-}
-
-static void
-vl_dri_unlock(void *priv)
-{
-   struct vl_dri_context *vl_dri_ctx = priv;
-   drm_context_t hw_context;
-
-   assert(priv);
-
-   hw_context = vl_dri_ctx->dri_context->drm_context;
-
-   vl_dri_ctx->is_locked = FALSE;
-   DRM_UNLOCK(vl_dri_ctx->fd, vl_dri_ctx->lock, hw_context);
-}
-
-static boolean
-vl_dri_is_locked(void *priv)
-{
-   struct vl_dri_context *vl_dri_ctx = priv;
-
-   assert(priv);
-
-   return vl_dri_ctx->is_locked;
-}
-
-static boolean
-vl_dri_lost_lock(void *priv)
-{
-   struct vl_dri_context *vl_dri_ctx = priv;
-
-   assert(priv);
-
-   return vl_dri_ctx->lost_lock;
-}
-
-static void
-vl_dri_clear_lost_lock(void *priv)
-{
-   struct vl_dri_context *vl_dri_ctx = priv;
-
-   assert(priv);
-
-   vl_dri_ctx->lost_lock = FALSE;
-}
-
-struct dri1_api_lock_funcs dri1_lf =
-{
-   .lock = vl_dri_lock,
-   .unlock = vl_dri_unlock,
-   .is_locked = vl_dri_is_locked,
-   .is_lock_lost = vl_dri_lost_lock,
-   .clear_lost_lock = vl_dri_clear_lost_lock
-};
-
-static void
-vl_dri_copy_version(struct dri1_api_version *dst, dri_version_t *src)
-{
-   assert(src);
-   assert(dst);
-   dst->major = src->major;
-   dst->minor = src->minor;
-   dst->patch_level = src->patch;
-}
-
-static boolean
-vl_dri_intersect_src_bbox(struct drm_clip_rect *dst, int dst_x, int dst_y,
-                          const struct drm_clip_rect *src, const struct drm_clip_rect *bbox)
-{
-   int xy1;
-   int xy2;
-
-   assert(dst);
-   assert(src);
-   assert(bbox);
-
-   xy1 = ((int)src->x1 > (int)bbox->x1 + dst_x) ? src->x1 :
-      (int)bbox->x1 + dst_x;
-   xy2 = ((int)src->x2 < (int)bbox->x2 + dst_x) ? src->x2 :
-      (int)bbox->x2 + dst_x;
-   if (xy1 >= xy2 || xy1 < 0)
-      return FALSE;
-
-   dst->x1 = xy1;
-   dst->x2 = xy2;
-
-   xy1 = ((int)src->y1 > (int)bbox->y1 + dst_y) ? src->y1 :
-      (int)bbox->y1 + dst_y;
-   xy2 = ((int)src->y2 < (int)bbox->y2 + dst_y) ? src->y2 :
-      (int)bbox->y2 + dst_y;
-   if (xy1 >= xy2 || xy1 < 0)
-      return FALSE;
-
-   dst->y1 = xy1;
-   dst->y2 = xy2;
-   return TRUE;
-}
-
-static void
-vl_clip_copy(struct vl_dri_context *vl_dri_ctx,
-             struct pipe_surface *dst,
-             struct pipe_surface *src,
-             const struct drm_clip_rect *src_bbox)
-{
-   struct pipe_video_context *vpipe;
-   struct drm_clip_rect clip;
-   struct drm_clip_rect *cur;
-   int i;
-
-   assert(vl_dri_ctx);
-   assert(dst);
-   assert(src);
-   assert(src_bbox);
-
-   vpipe = vl_dri_ctx->base.vpipe;
-
-   assert(vl_dri_ctx->drawable->cliprects);
-   assert(vl_dri_ctx->drawable->num_cliprects > 0);
-
-   cur = vl_dri_ctx->drawable->cliprects;
-
-   for (i = 0; i < vl_dri_ctx->drawable->num_cliprects; ++i) {
-      if (vl_dri_intersect_src_bbox(&clip, vl_dri_ctx->drawable->x, vl_dri_ctx->drawable->y, cur++, src_bbox))
-         vpipe->surface_copy
-         (
-            vpipe, dst, clip.x1, clip.y1, src,
-            (int)clip.x1 - vl_dri_ctx->drawable->x,
-            (int)clip.y1 - vl_dri_ctx->drawable->y,
-            clip.x2 - clip.x1, clip.y2 - clip.y1
-         );
-   }
-}
-
-static void
-vl_dri_update_drawables_locked(struct vl_dri_context *vl_dri_ctx)
-{
-   struct vl_dri_screen *vl_dri_scrn;
-
-   assert(vl_dri_ctx);
-
-   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
-
-   if (vl_dri_ctx->lost_lock) {
-      vl_dri_ctx->lost_lock = FALSE;
-      DRI_VALIDATE_DRAWABLE_INFO(vl_dri_scrn->dri_screen, vl_dri_ctx->drawable);
-   }
-}
-
-static void
-vl_dri_flush_frontbuffer(struct pipe_screen *screen,
-                         struct pipe_surface *surf, void *context_private)
-{
-   struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)context_private;
-   struct vl_dri_screen *vl_dri_scrn;
-   struct drm_clip_rect src_bbox;
-   boolean save_lost_lock = FALSE;
-
-   assert(screen);
-   assert(surf);
-   assert(context_private);
-
-   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
-
-   vl_dri_lock(vl_dri_ctx);
-
-   save_lost_lock = vl_dri_ctx->lost_lock;
-
-   vl_dri_update_drawables_locked(vl_dri_ctx);
-
-   if (vl_dri_ctx->drawable->cliprects) {
-      src_bbox.x1 = 0;
-      src_bbox.x2 = vl_dri_ctx->drawable->w;
-      src_bbox.y1 = 0;
-      src_bbox.y2 = vl_dri_ctx->drawable->h;
-
-#if 0
-      if (vl_dri_scrn->_api_hooks->present_locked)
-         vl_dri_scrn->api_hooks->present_locked(pipe, surf,
-                                                vl_dri_ctx->drawable->cliprects,
-                                                vl_dri_ctx->drawable->num_cliprects,
-                                                vl_dri_ctx->drawable->x, vl_dri_drawable->y,
-                                                &bbox, NULL /*fence*/);
-      else
-#endif
-      if (vl_dri_scrn->api_hooks->front_srf_locked) {
-         struct pipe_surface *front = vl_dri_scrn->api_hooks->front_srf_locked(screen);
-
-         if (front)
-            vl_clip_copy(vl_dri_ctx, front, surf, &src_bbox);
-
-         //st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, fence);
-      }
-   }
-
-   vl_dri_ctx->lost_lock = save_lost_lock;
-
-   vl_dri_unlock(vl_dri_ctx);
-}
-
 static struct pipe_surface*
 vl_dri2_get_front(struct vl_dri_screen *vl_dri_scrn, Drawable drawable)
 {
@@ -286,6 +64,9 @@ vl_dri2_get_front(struct vl_dri_screen *vl_dri_scrn, Drawable drawable)
 
    dri2_front = DRI2GetBuffers(vl_dri_scrn->dri_screen->display,
                                drawable, &w, &h, attachments, 1, &count);
+
+   assert(count == 1);
+
    if (dri2_front) {
       struct winsys_handle dri2_front_handle =
       {
@@ -297,7 +78,7 @@ vl_dri2_get_front(struct vl_dri_screen *vl_dri_scrn, Drawable drawable)
 
       memset(&template, 0, sizeof(struct pipe_resource));
       template.target = PIPE_TEXTURE_2D;
-      template.format = vl_dri_scrn->base.format;
+      template.format = PIPE_FORMAT_B8G8R8X8_UNORM;
       template.last_level = 0;
       template.width0 = w;
       template.height0 = h;
@@ -310,8 +91,9 @@ vl_dri2_get_front(struct vl_dri_screen *vl_dri_scrn, Drawable drawable)
       if (front_tex)
          front_surf = vl_dri_scrn->base.pscreen->get_tex_surface(vl_dri_scrn->base.pscreen,
                                                                  front_tex, 0, 0, 0,
-                                                                 /*PIPE_BIND_RENDER_TARGET*/ PIPE_BIND_BLIT_DESTINATION);
+                                                                 PIPE_BIND_RENDER_TARGET);
       pipe_resource_reference(&front_tex, NULL);
+      Xfree(dri2_front);
    }
 
    return front_surf;
@@ -322,119 +104,121 @@ vl_dri2_flush_frontbuffer(struct pipe_screen *screen,
                           struct pipe_surface *surf, void *context_private)
 {
    struct vl_dri_context *vl_dri_ctx = (struct vl_dri_context*)context_private;
-   struct vl_dri_screen *vl_dri_scrn;
-   struct pipe_video_context *vpipe;
+   struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
 
    assert(screen);
    assert(surf);
    assert(context_private);
-   assert(vl_dri_ctx->dri2_front);
-
-   vl_dri_scrn = (struct vl_dri_screen*)vl_dri_ctx->base.vscreen;
-   vpipe = vl_dri_ctx->base.vpipe;
 
-   /* XXX: Why not just render to fake front? */
-   vpipe->surface_copy(vpipe, vl_dri_ctx->dri2_front, 0, 0, surf, 0, 0, surf->width, surf->height);
-
-   //st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, fence);
+   dri2CopyDrawable(vl_dri_scrn->dri_screen, vl_dri_scrn->last_seen_drawable,
+                    DRI_BUFFER_FRONT_LEFT, DRI_BUFFER_FAKE_FRONT_LEFT);
 }
 
-/* XXX: Kill with fire */
-struct vl_dri_context *_vl_dri_ctx = NULL;
-
-void*
-vl_displaytarget_get(struct vl_screen *vscreen, Drawable drawable,
-                     unsigned *width, unsigned *height)
+struct pipe_surface*
+vl_drawable_surface_get(struct vl_screen *vscreen, Drawable drawable)
 {
    struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)vscreen;
 
    assert(vscreen);
-   assert(width);
-   assert(height);
-
-   if (vl_dri_scrn->dri2 && _vl_dri_ctx) {
-      if (!_vl_dri_ctx->dri2_front) {
-         _vl_dri_ctx->dri2_front = vl_dri2_get_front((struct vl_dri_screen*)vscreen, drawable);
-         if (!_vl_dri_ctx->dri2_front)
-            return NULL;
-         *width = _vl_dri_ctx->dri2_front->width;
-         *height = _vl_dri_ctx->dri2_front->height;
+
+   if (vl_dri_scrn->last_seen_drawable != drawable) {
+      /* Hash table business depends on this equality */
+      assert(None == NULL);
+      Drawable lookup_drawable = (Drawable)util_hash_table_get(vl_dri_scrn->drawable_table, (void*)drawable);
+      if (lookup_drawable == None) {
+         dri2CreateDrawable(vl_dri_scrn->dri_screen, drawable);
+         util_hash_table_set(vl_dri_scrn->drawable_table, (void*)drawable, (void*)drawable);
       }
-      return _vl_dri_ctx;
+      vl_dri_scrn->last_seen_drawable = drawable;
    }
-   else
-      return NULL;
+
+   return vl_dri2_get_front(vl_dri_scrn, drawable);
+}
+
+void*
+vl_contextprivate_get(struct vl_context *vctx, struct pipe_surface *displaytarget)
+{
+   return vctx;
+}
+
+static unsigned drawable_hash(void *key)
+{
+   Drawable drawable = (Drawable)key;
+   assert(drawable != None);
+   return util_hash_crc32(&drawable, sizeof(Drawable));
+}
+
+static int drawable_cmp(void *key1, void *key2)
+{
+   Drawable d1 = (Drawable)key1;
+   Drawable d2 = (Drawable)key2;
+   assert(d1 != None);
+   assert(d2 != None);
+   return d1 != d2;
+}
+
+static enum pipe_error
+drawable_destroy(void *key, void *value, void *data)
+{
+   Drawable drawable = (Drawable)key;
+   struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)data;
+
+   assert(drawable != None);
+   assert(value);
+   assert(data);
+
+   dri2DestroyDrawable(vl_dri_scrn->dri_screen, drawable);
+
+   return PIPE_OK;
 }
 
 struct vl_screen*
 vl_screen_create(Display *display, int screen)
 {
    struct vl_dri_screen *vl_dri_scrn;
-   struct dri1_create_screen_arg arg;
+   struct drm_create_screen_arg arg;
 
    assert(display);
 
    vl_dri_scrn = CALLOC_STRUCT(vl_dri_screen);
    if (!vl_dri_scrn)
-      return NULL;
-
-   /* Try DRI2 first */
-   if (dri2CreateScreen(display, screen, &vl_dri_scrn->dri_screen)) {
-      /* If not, try DRI */
-      if (driCreateScreen(display, screen, &vl_dri_scrn->dri_screen, &vl_dri_scrn->dri_framebuf)) {
-         /* Now what? */
-         FREE(vl_dri_scrn);
-         return NULL;
-      }
-      else {
-         /* Got DRI */
-         arg.base.mode = DRM_CREATE_DRI1;
-         arg.lf = &dri1_lf;
-         arg.ddx_info = vl_dri_scrn->dri_framebuf.private;
-         arg.ddx_info_size = vl_dri_scrn->dri_framebuf.private_size;
-         arg.sarea = vl_dri_scrn->dri_screen->sarea;
-         vl_dri_copy_version(&arg.ddx_version, &vl_dri_scrn->dri_screen->ddx);
-         vl_dri_copy_version(&arg.dri_version, &vl_dri_scrn->dri_screen->dri);
-         vl_dri_copy_version(&arg.drm_version, &vl_dri_scrn->dri_screen->drm);
-         arg.api = NULL;
-         vl_dri_scrn->dri2 = FALSE;
-      }
-   }
-   else {
-      /* Got DRI2 */
-      arg.base.mode = DRM_CREATE_NORMAL;
-      vl_dri_scrn->dri2 = TRUE;
-   }
+      goto no_struct;
+
+   if (dri2CreateScreen(display, screen, &vl_dri_scrn->dri_screen))
+      goto no_dri2screen;
 
    vl_dri_scrn->api = drm_api_create();
-   if (!vl_dri_scrn->api) {
-      FREE(vl_dri_scrn);
-      return NULL;
-   }
+   if (!vl_dri_scrn->api)
+      goto no_drmapi;
+
+   arg.mode = DRM_CREATE_NORMAL;
 
    vl_dri_scrn->base.pscreen = vl_dri_scrn->api->create_screen(vl_dri_scrn->api,
                                                                vl_dri_scrn->dri_screen->fd,
-                                                               &arg.base);
+                                                               &arg);
 
-   if (!vl_dri_scrn->base.pscreen) {
-      FREE(vl_dri_scrn);
-      return NULL;
-   }
+   if (!vl_dri_scrn->base.pscreen)
+      goto no_pscreen;
 
-   if (!vl_dri_scrn->dri2) {
-      vl_dri_scrn->visual = XDefaultVisual(display, screen);
-      vl_dri_scrn->api_hooks = arg.api;
-      vl_dri_scrn->base.format = vl_dri_scrn->api_hooks->front_srf_locked(vl_dri_scrn->base.pscreen)->format;
-      vl_dri_scrn->base.pscreen->flush_frontbuffer = vl_dri_flush_frontbuffer;
-   }
-   else {
-      /* XXX: Fuuuuu... Can't possibly get this right with current code.
-       * Need to rethink this in st/xvmc and winsys dri/xlib winsyses */
-      vl_dri_scrn->base.format = PIPE_FORMAT_B8G8R8X8_UNORM;
-      vl_dri_scrn->base.pscreen->flush_frontbuffer = vl_dri2_flush_frontbuffer;
-   }
+   vl_dri_scrn->drawable_table = util_hash_table_create(&drawable_hash, &drawable_cmp);
+   if (!vl_dri_scrn->drawable_table)
+      goto no_hash;
+
+   vl_dri_scrn->last_seen_drawable = None;
+   vl_dri_scrn->base.pscreen->flush_frontbuffer = vl_dri2_flush_frontbuffer;
 
    return &vl_dri_scrn->base;
+
+no_hash:
+   vl_dri_scrn->base.pscreen->destroy(vl_dri_scrn->base.pscreen);
+no_pscreen:
+   vl_dri_scrn->api->destroy(vl_dri_scrn->api);
+no_drmapi:
+   dri2DestroyScreen(vl_dri_scrn->dri_screen);
+no_dri2screen:
+   FREE(vl_dri_scrn);
+no_struct:
+   return NULL;
 }
 
 void vl_screen_destroy(struct vl_screen *vscreen)
@@ -443,11 +227,12 @@ void vl_screen_destroy(struct vl_screen *vscreen)
 
    assert(vscreen);
 
+   util_hash_table_foreach(vl_dri_scrn->drawable_table, drawable_destroy, vl_dri_scrn);
+   util_hash_table_destroy(vl_dri_scrn->drawable_table);
    vl_dri_scrn->base.pscreen->destroy(vl_dri_scrn->base.pscreen);
-   if (vl_dri_scrn->dri2)
-      dri2DestroyScreen(vl_dri_scrn->dri_screen);
-   else
-      driDestroyScreen(vl_dri_scrn->dri_screen);
+   if (vl_dri_scrn->api->destroy)
+      vl_dri_scrn->api->destroy(vl_dri_scrn->api);
+   dri2DestroyScreen(vl_dri_scrn->dri_screen);
    FREE(vl_dri_scrn);
 }
 
@@ -462,39 +247,33 @@ vl_video_create(struct vl_screen *vscreen,
 
    vl_dri_ctx = CALLOC_STRUCT(vl_dri_context);
    if (!vl_dri_ctx)
-      return NULL;
-
-   /* XXX: Is default visual correct/sufficient here? */
-   if (!vl_dri_scrn->dri2)
-      driCreateContext(vl_dri_scrn->dri_screen, vl_dri_scrn->visual, &vl_dri_ctx->dri_context);
+      goto no_struct;
 
    if (!vscreen->pscreen->video_context_create) {
       debug_printf("[G3DVL] No video support found on %s/%s.\n",
                    vscreen->pscreen->get_vendor(vscreen->pscreen),
                    vscreen->pscreen->get_name(vscreen->pscreen));
-      FREE(vl_dri_ctx);
-      return NULL;
+      goto no_vpipe;
    }
 
    vl_dri_ctx->base.vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen,
                                                                    profile, chroma_format,
                                                                    width, height,
-                                                                   vl_dri_ctx->dri_context);
+                                                                   vl_dri_ctx);
 
-   if (!vl_dri_ctx->base.vpipe) {
-      FREE(vl_dri_ctx);
-      return NULL;
-   }
+   if (!vl_dri_ctx->base.vpipe)
+      goto no_vpipe;
 
    vl_dri_ctx->base.vpipe->priv = vl_dri_ctx;
    vl_dri_ctx->base.vscreen = vscreen;
    vl_dri_ctx->fd = vl_dri_scrn->dri_screen->fd;
-   if (!vl_dri_scrn->dri2)
-      vl_dri_ctx->lock = (drmLock*)&vl_dri_scrn->dri_screen->sarea->lock;
-   else
-      _vl_dri_ctx = vl_dri_ctx;
 
    return &vl_dri_ctx->base;
+
+no_vpipe:
+   FREE(vl_dri_ctx);
+no_struct:
+   return NULL;
 }
 
 void vl_video_destroy(struct vl_context *vctx)
@@ -504,9 +283,5 @@ void vl_video_destroy(struct vl_context *vctx)
    assert(vctx);
 
    vl_dri_ctx->base.vpipe->destroy(vl_dri_ctx->base.vpipe);
-   if (vl_dri_ctx->dri2_front)
-      pipe_surface_reference(&vl_dri_ctx->dri2_front, NULL);
-   if (!((struct vl_dri_screen *)vctx->vscreen)->dri2)
-      driDestroyContext(vl_dri_ctx->dri_context);
    FREE(vl_dri_ctx);
 }
diff --git a/src/gallium/winsys/g3dvl/dri/driclient.c b/src/gallium/winsys/g3dvl/dri/driclient.c
index 7a2469c0d66..90e48a7cb28 100644
--- a/src/gallium/winsys/g3dvl/dri/driclient.c
+++ b/src/gallium/winsys/g3dvl/dri/driclient.c
@@ -353,17 +353,36 @@ free_screen:
 int dri2DestroyScreen(dri_screen_t *dri_screen)
 {
 	/* Not much to do here apparently... */
+	assert(dri_screen);
+	free(dri_screen);
 	return 0;
 }
 
 int dri2CreateDrawable(dri_screen_t *dri_screen, XID drawable)
 {
+	assert(dri_screen);
 	DRI2CreateDrawable(dri_screen->display, drawable);
 	return 0;
 }
 
 int dri2DestroyDrawable(dri_screen_t *dri_screen, XID drawable)
 {
+	assert(dri_screen);
 	DRI2DestroyDrawable(dri_screen->display, drawable);
 	return 0;
 }
+
+int dri2CopyDrawable(dri_screen_t *dri_screen, XID drawable, int dest, int src)
+{
+	XserverRegion region;
+
+	assert(dri_screen);
+	assert(dest >= DRI_BUFFER_FRONT_LEFT && dest <= DRI_BUFFER_DEPTH_STENCIL);
+	assert(src >= DRI_BUFFER_FRONT_LEFT && src <= DRI_BUFFER_DEPTH_STENCIL);
+
+	region = XFixesCreateRegionFromWindow(dri_screen->display, drawable, WindowRegionBounding);
+	DRI2CopyRegion(dri_screen->display, drawable, region, dest, src);
+	XFixesDestroyRegion(dri_screen->display, region);
+
+	return 0;
+}
diff --git a/src/gallium/winsys/g3dvl/dri/driclient.h b/src/gallium/winsys/g3dvl/dri/driclient.h
index c71b6c2c831..4e4fd362395 100644
--- a/src/gallium/winsys/g3dvl/dri/driclient.h
+++ b/src/gallium/winsys/g3dvl/dri/driclient.h
@@ -100,6 +100,7 @@ int dri2CreateScreen(Display *display, int screen, dri_screen_t **dri_screen);
 int dri2DestroyScreen(dri_screen_t *dri_screen);
 int dri2CreateDrawable(dri_screen_t *dri_screen, XID drawable);
 int dri2DestroyDrawable(dri_screen_t *dri_screen, XID drawable);
+int dri2CopyDrawable(dri_screen_t *dri_screen, XID drawable, int dest, int src);
 
 #define DRI_BUFFER_FRONT_LEFT		0
 #define DRI_BUFFER_BACK_LEFT		1
diff --git a/src/gallium/winsys/g3dvl/vl_winsys.h b/src/gallium/winsys/g3dvl/vl_winsys.h
index c75ff9f32f5..381478637a8 100644
--- a/src/gallium/winsys/g3dvl/vl_winsys.h
+++ b/src/gallium/winsys/g3dvl/vl_winsys.h
@@ -34,11 +34,10 @@
 
 struct pipe_screen;
 struct pipe_video_context;
+struct pipe_surface;
 
 struct vl_screen
 {
-   Display *display;
-   enum pipe_format format;
    struct pipe_screen *pscreen;
 };
 
@@ -61,8 +60,10 @@ vl_video_create(struct vl_screen *vscreen,
 
 void vl_video_destroy(struct vl_context *vctx);
 
+struct pipe_surface*
+vl_drawable_surface_get(struct vl_screen *vscreen, Drawable drawable);
+
 void*
-vl_displaytarget_get(struct vl_screen *vscreen, Drawable drawable,
-                     unsigned *width, unsigned *height);
+vl_contextprivate_get(struct vl_context *vctx, struct pipe_surface *drawable_surface);
 
 #endif
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 95c2af1e73c..0a7f324a77c 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -26,81 +26,136 @@
  **************************************************************************/
 
 #include <vl_winsys.h>
+#include <X11/Xlibint.h>
 #include <state_tracker/xlib_sw_winsys.h>
 #include <util/u_memory.h>
+#include <util/u_format.h>
 #include <softpipe/sp_public.h>
 #include <softpipe/sp_video_context.h>
 
-/* TODO: Find a good way to calculate this */
-static enum pipe_format VisualToPipe(Visual *visual)
+struct vl_xsp_screen
 {
-   assert(visual);
-   return PIPE_FORMAT_B8G8R8X8_UNORM;
-}
-
-/* XXX: Not thread-safe */
-static struct xlib_drawable xdraw;
-
-void*
-vl_displaytarget_get(struct vl_screen *vscreen, Drawable drawable,
-                     unsigned *width_out, unsigned *height_out)
+   struct vl_screen base;
+   Display *display;
+   int screen;
+   Visual visual;
+   struct xlib_drawable xdraw;
+   struct pipe_surface *drawable_surface;
+};
+
+struct pipe_surface*
+vl_drawable_surface_get(struct vl_screen *vscreen, Drawable drawable)
 {
+   struct vl_xsp_screen *xsp_screen = (struct vl_xsp_screen*)vscreen;
    Window root;
    int x, y;
    unsigned int width, height;
    unsigned int border_width;
    unsigned int depth;
+   struct pipe_resource templat, *drawable_tex;
+   struct pipe_surface *drawable_surface = NULL;
 
    assert(vscreen);
+   assert(drawable != None);
+
+   if (XGetGeometry(xsp_screen->display, drawable, &root, &x, &y, &width, &height, &border_width, &depth) == BadDrawable)
+      return NULL;
+
+   xsp_screen->xdraw.drawable = drawable;
+
+   if (xsp_screen->drawable_surface) {
+      if (xsp_screen->drawable_surface->width == width &&
+          xsp_screen->drawable_surface->height == height) {
+         pipe_surface_reference(&drawable_surface, xsp_screen->drawable_surface);
+         return drawable_surface;
+      }
+      else
+         pipe_surface_reference(&xsp_screen->drawable_surface, NULL);
+   }
+
+   memset(&templat, 0, sizeof(struct pipe_resource));
+   templat.target = PIPE_TEXTURE_2D;
+   /* XXX: Need to figure out drawable's format */
+   templat.format = PIPE_FORMAT_B8G8R8X8_UNORM;
+   templat.last_level = 0;
+   templat.width0 = width;
+   templat.height0 = height;
+   templat.depth0 = 1;
+   templat.usage = PIPE_USAGE_DEFAULT;
+   templat.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_BLIT_SOURCE;
+   templat.flags = 0;
+
+   drawable_tex = vscreen->pscreen->resource_create(vscreen->pscreen, &templat);
+   if (!drawable_tex)
+      return NULL;
+
+   xsp_screen->drawable_surface = vscreen->pscreen->get_tex_surface(vscreen->pscreen, drawable_tex,
+                                                                    0, 0, 0,
+                                                                    templat.bind);
+   pipe_resource_reference(&drawable_tex, NULL);
 
-   if (XGetGeometry(vscreen->display, drawable, &root, &x, &y, &width, &height, &border_width, &depth) == BadDrawable)
+   if (!xsp_screen->drawable_surface)
       return NULL;
 
-   if (width_out) *width_out = width;
-   if (height_out) *height_out = height;
+   pipe_surface_reference(&drawable_surface, xsp_screen->drawable_surface);
+
+   xsp_screen->xdraw.depth = 24/*util_format_get_blocksizebits(templat.format) /
+                             util_format_get_blockwidth(templat.format)*/;
+
+   return drawable_surface;
+}
+
+void*
+vl_contextprivate_get(struct vl_context *vctx, struct pipe_surface *drawable_surface)
+{
+   struct vl_xsp_screen *xsp_screen = (struct vl_xsp_screen*)vctx->vscreen;
 
-   xdraw.depth = depth;
-   xdraw.drawable = drawable;
+   assert(vctx);
+   assert(drawable_surface);
+   assert(xsp_screen->drawable_surface == drawable_surface);
 
-   return &xdraw;
+   return &xsp_screen->xdraw;
 }
 
 struct vl_screen*
 vl_screen_create(Display *display, int screen)
 {
-   struct vl_screen *vscreen;
+   struct vl_xsp_screen *xsp_screen;
    struct sw_winsys *winsys;
 
    assert(display);
 
-   vscreen = CALLOC_STRUCT(vl_screen);
-   if (!vscreen)
+   xsp_screen = CALLOC_STRUCT(vl_xsp_screen);
+   if (!xsp_screen)
       return NULL;
 
    winsys = xlib_create_sw_winsys(display);
    if (!winsys) {
-      FREE(vscreen);
+      FREE(xsp_screen);
       return NULL;
    }
 
-   vscreen->pscreen = softpipe_create_screen(winsys);
-   if (!vscreen->pscreen) {
+   xsp_screen->base.pscreen = softpipe_create_screen(winsys);
+   if (!xsp_screen->base.pscreen) {
       winsys->destroy(winsys);
-      FREE(vscreen);
+      FREE(xsp_screen);
       return NULL;
    }
 
-   vscreen->display = display;
-   xdraw.visual = XDefaultVisual(display, screen);
-   vscreen->format = VisualToPipe(xdraw.visual);
+   xsp_screen->display = display;
+   xsp_screen->screen = screen;
+   xsp_screen->xdraw.visual = XDefaultVisual(display, screen);
 
-   return vscreen;
+   return &xsp_screen->base;
 }
 
 void vl_screen_destroy(struct vl_screen *vscreen)
 {
+   struct vl_xsp_screen *xsp_screen = (struct vl_xsp_screen*)vscreen;
+
    assert(vscreen);
 
+   pipe_surface_reference(&xsp_screen->drawable_surface, NULL);
    vscreen->pscreen->destroy(vscreen->pscreen);
    FREE(vscreen);
 }
@@ -142,8 +197,6 @@ void vl_video_destroy(struct vl_context *vctx)
 {
    assert(vctx);
 
-#if 1
    vctx->vpipe->destroy(vctx->vpipe);
-#endif
    FREE(vctx);
 }
-- 
cgit v1.2.3


From 511cb3fbf9352540dd667aa6b3bb7e24c93a4ce6 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sun, 6 Jun 2010 13:31:53 -0400
Subject: vl: Decode to XRGB, not ARGB.

---
 src/gallium/drivers/nvfx/nvfx_video_context.c   | 2 +-
 src/gallium/drivers/softpipe/sp_video_context.c | 2 +-
 src/gallium/include/pipe/p_format.h             | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nvfx/nvfx_video_context.c b/src/gallium/drivers/nvfx/nvfx_video_context.c
index 4e21f35f40d..9212ae57fc0 100644
--- a/src/gallium/drivers/nvfx/nvfx_video_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_video_context.c
@@ -45,5 +45,5 @@ nvfx_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
                              VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
                              VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
                              true,
-                             PIPE_FORMAT_VUYA);
+                             PIPE_FORMAT_VUYX);
 }
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 9aec8a8c4fe..9d3a1ab5f5b 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -402,7 +402,7 @@ sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
                              VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
                              VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
                              true,
-                             PIPE_FORMAT_AYUV);
+                             PIPE_FORMAT_XYUV);
 }
 
 struct pipe_video_context *
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 70a768c4e16..5ca27b3db28 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -195,6 +195,8 @@ enum pipe_format {
    PIPE_FORMAT_NV21                  = 140,
    PIPE_FORMAT_AYUV                  = PIPE_FORMAT_A8R8G8B8_UNORM,
    PIPE_FORMAT_VUYA                  = PIPE_FORMAT_B8G8R8A8_UNORM,
+   PIPE_FORMAT_XYUV                  = PIPE_FORMAT_X8R8G8B8_UNORM,
+   PIPE_FORMAT_VUYX                  = PIPE_FORMAT_B8G8R8X8_UNORM,
    PIPE_FORMAT_IA44                  = 141,
    PIPE_FORMAT_AI44                  = 142,
 
@@ -207,6 +209,7 @@ enum pipe_video_chroma_format
    PIPE_VIDEO_CHROMA_FORMAT_422,
    PIPE_VIDEO_CHROMA_FORMAT_444
 };
+
 #ifdef __cplusplus
 }
 #endif
-- 
cgit v1.2.3


From 4b2fcb2bcb7a93cf3dc8cd164f4e87b5c538f7f6 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sun, 13 Jun 2010 17:36:34 -0400
Subject: vl: Add transfer funcs to pipe_video_context and softpipe.

---
 src/gallium/drivers/softpipe/sp_video_context.c | 95 +++++++++++++++++++++++++
 src/gallium/include/pipe/p_video_context.h      | 28 ++++++++
 2 files changed, 123 insertions(+)

diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 9d3a1ab5f5b..44df00e0b78 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -161,6 +161,94 @@ sp_mpeg12_surface_copy(struct pipe_video_context *vpipe,
       util_surface_copy(ctx->pipe, FALSE, dst, dstx, dsty, src, srcx, srcy, width, height);
 }
 
+static struct pipe_transfer*
+sp_mpeg12_get_transfer(struct pipe_video_context *vpipe,
+                       struct pipe_resource *resource,
+                       struct pipe_subresource subresource,
+                       unsigned usage,  /* a combination of PIPE_TRANSFER_x */
+                       const struct pipe_box *box)
+{
+   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(resource);
+   assert(box);
+
+   return ctx->pipe->get_transfer(ctx->pipe, resource, subresource, usage, box);
+}
+
+static void
+sp_mpeg12_transfer_destroy(struct pipe_video_context *vpipe,
+                           struct pipe_transfer *transfer)
+{
+   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(transfer);
+
+   ctx->pipe->transfer_destroy(ctx->pipe, transfer);
+}
+
+static void*
+sp_mpeg12_transfer_map(struct pipe_video_context *vpipe,
+                       struct pipe_transfer *transfer)
+{
+   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(transfer);
+
+   return ctx->pipe->transfer_map(ctx->pipe, transfer);
+}
+
+static void
+sp_mpeg12_transfer_flush_region(struct pipe_video_context *vpipe,
+                                struct pipe_transfer *transfer,
+                                const struct pipe_box *box)
+{
+   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(transfer);
+   assert(box);
+
+   ctx->pipe->transfer_flush_region(ctx->pipe, transfer, box);
+}
+
+static void
+sp_mpeg12_transfer_unmap(struct pipe_video_context *vpipe,
+                         struct pipe_transfer *transfer)
+{
+   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(transfer);
+
+   ctx->pipe->transfer_unmap(ctx->pipe, transfer);
+}
+
+static void
+sp_mpeg12_transfer_inline_write(struct pipe_video_context *vpipe,
+                                struct pipe_resource *resource,
+                                struct pipe_subresource subresource,
+                                unsigned usage, /* a combination of PIPE_TRANSFER_x */
+                                const struct pipe_box *box,
+                                const void *data,
+                                unsigned stride,
+                                unsigned slice_stride)
+{
+   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(resource);
+   assert(box);
+   assert(data);
+   assert(ctx->pipe->transfer_inline_write);
+
+   ctx->pipe->transfer_inline_write(ctx->pipe, resource, subresource, usage,
+                                    box, data, stride, slice_stride);
+}
+
 static void
 sp_mpeg12_render_picture(struct pipe_video_context     *vpipe,
                          struct pipe_surface           *src_surface,
@@ -346,6 +434,13 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
    ctx->base.render_picture = sp_mpeg12_render_picture;
    ctx->base.surface_fill = sp_mpeg12_surface_fill;
    ctx->base.surface_copy = sp_mpeg12_surface_copy;
+   ctx->base.get_transfer = sp_mpeg12_get_transfer;
+   ctx->base.transfer_destroy = sp_mpeg12_transfer_destroy;
+   ctx->base.transfer_map = sp_mpeg12_transfer_map;
+   ctx->base.transfer_flush_region = sp_mpeg12_transfer_flush_region;
+   ctx->base.transfer_unmap = sp_mpeg12_transfer_unmap;
+   if (pipe->transfer_inline_write)
+      ctx->base.transfer_inline_write = sp_mpeg12_transfer_inline_write;
    ctx->base.set_picture_background = sp_mpeg12_set_picture_background;
    ctx->base.set_picture_layers = sp_mpeg12_set_picture_layers;
    ctx->base.set_decode_target = sp_mpeg12_set_decode_target;
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index d90b667de6c..294dc464c36 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -114,6 +114,34 @@ struct pipe_video_context
                         unsigned srcx, unsigned srcy,
                         unsigned width, unsigned height);
 
+   struct pipe_transfer *(*get_transfer)(struct pipe_video_context *vpipe,
+                                         struct pipe_resource *resource,
+                                         struct pipe_subresource subresource,
+                                         unsigned usage,  /* a combination of PIPE_TRANSFER_x */
+                                         const struct pipe_box *box);
+
+   void (*transfer_destroy)(struct pipe_video_context *vpipe,
+                            struct pipe_transfer *transfer);
+
+   void* (*transfer_map)(struct pipe_video_context *vpipe,
+                         struct pipe_transfer *transfer);
+
+   void (*transfer_flush_region)(struct pipe_video_context *vpipe,
+                                 struct pipe_transfer *transfer,
+                                 const struct pipe_box *box);
+
+   void (*transfer_unmap)(struct pipe_video_context *vpipe,
+                          struct pipe_transfer *transfer);
+
+   void (*transfer_inline_write)(struct pipe_video_context *vpipe,
+                                 struct pipe_resource *resource,
+                                 struct pipe_subresource subresource,
+                                 unsigned usage, /* a combination of PIPE_TRANSFER_x */
+                                 const struct pipe_box *box,
+                                 const void *data,
+                                 unsigned stride,
+                                 unsigned slice_stride);
+
    /*@}*/
 
    /**
-- 
cgit v1.2.3


From b9fe9665194899fc0a6336d876ab0596418c7287 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sun, 13 Jun 2010 17:37:33 -0400
Subject: vl: Fix RGB subpictures.

---
 src/gallium/auxiliary/vl/vl_compositor.c          | 69 ++++++++++++++++++++---
 src/gallium/auxiliary/vl/vl_compositor.h          |  6 +-
 src/gallium/state_trackers/xorg/xvmc/subpicture.c | 33 +++++------
 src/gallium/state_trackers/xorg/xvmc/surface.c    |  4 +-
 4 files changed, 84 insertions(+), 28 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 8203bf79ee0..0640b1a4565 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -98,7 +98,7 @@ create_vert_shader(struct vl_compositor *c)
 }
 
 static bool
-create_frag_shader(struct vl_compositor *c)
+create_frag_shader_ycbcr_2_rgb(struct vl_compositor *c)
 {
    struct ureg_program *shader;
    struct ureg_src tc;
@@ -130,8 +130,37 @@ create_frag_shader(struct vl_compositor *c)
    ureg_release_temporary(shader, texel);
    ureg_END(shader);
 
-   c->fragment_shader = ureg_create_shader_and_destroy(shader, c->pipe);
-   if (!c->fragment_shader)
+   c->fragment_shader.ycbcr_2_rgb = ureg_create_shader_and_destroy(shader, c->pipe);
+   if (!c->fragment_shader.ycbcr_2_rgb)
+      return false;
+
+   return true;
+}
+
+static bool
+create_frag_shader_rgb_2_rgb(struct vl_compositor *c)
+{
+   struct ureg_program *shader;
+   struct ureg_src tc;
+   struct ureg_src sampler;
+   struct ureg_dst fragment;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
+
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
+   sampler = ureg_DECL_sampler(shader, 0);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /*
+    * fragment = tex(tc, sampler)
+    */
+   ureg_TEX(shader, fragment, TGSI_TEXTURE_2D, tc, sampler);
+   ureg_END(shader);
+
+   c->fragment_shader.rgb_2_rgb = ureg_create_shader_and_destroy(shader, c->pipe);
+   if (!c->fragment_shader.rgb_2_rgb)
       return false;
 
    return true;
@@ -178,8 +207,18 @@ init_shaders(struct vl_compositor *c)
 {
    assert(c);
 
-   create_vert_shader(c);
-   create_frag_shader(c);
+   if (!create_vert_shader(c)) {
+      debug_printf("Unable to create vertex shader.\n");
+      return false;
+   }
+   if (!create_frag_shader_ycbcr_2_rgb(c)) {
+      debug_printf("Unable to create YCbCr-to-RGB fragment shader.\n");
+      return false;
+   }
+   if (!create_frag_shader_rgb_2_rgb(c)) {
+      debug_printf("Unable to create RGB-to-RGB fragment shader.\n");
+      return false;
+   }
 
    return true;
 }
@@ -189,7 +228,8 @@ static void cleanup_shaders(struct vl_compositor *c)
    assert(c);
 
    c->pipe->delete_vs_state(c->pipe, c->vertex_shader);
-   c->pipe->delete_fs_state(c->pipe, c->fragment_shader);
+   c->pipe->delete_fs_state(c->pipe, c->fragment_shader.ycbcr_2_rgb);
+   c->pipe->delete_fs_state(c->pipe, c->fragment_shader.rgb_2_rgb);
 }
 
 static bool
@@ -362,6 +402,9 @@ void vl_compositor_set_layers(struct vl_compositor *compositor,
             compositor->layer_dst_rects[i] = *dst_rects[i];
          compositor->dirty_layers |= 1 << i;
       }
+
+      if (layers[i])
+         compositor->dirty_layers |= 1 << i;
    }
 
    for (; i < VL_COMPOSITOR_MAX_LAYERS; ++i)
@@ -416,7 +459,8 @@ static unsigned gen_data(struct vl_compositor *c,
                          struct pipe_surface *src_surface,
                          struct pipe_video_rect *src_rect,
                          struct pipe_video_rect *dst_rect,
-                         struct pipe_surface **textures)
+                         struct pipe_surface **textures,
+                         void **frag_shaders)
 {
    void *vb;
    struct pipe_transfer *buf_transfer;
@@ -440,6 +484,8 @@ static unsigned gen_data(struct vl_compositor *c,
       struct vertex2f bg_inv_size = {1.0f / c->bg->width, 1.0f / c->bg->height};
       gen_rect_verts(num_rects, &c->bg_src_rect, &bg_inv_size, NULL, NULL, vb);
       textures[num_rects] = c->bg;
+      /* XXX: Hack */
+      frag_shaders[num_rects] = c->fragment_shader.rgb_2_rgb;
       ++num_rects;
       c->dirty_bg = false;
    }
@@ -448,6 +494,8 @@ static unsigned gen_data(struct vl_compositor *c,
       struct vertex2f src_inv_size = { 1.0f / src_surface->width, 1.0f / src_surface->height};
       gen_rect_verts(num_rects, src_rect, &src_inv_size, dst_rect, &c->fb_inv_size, vb);
       textures[num_rects] = src_surface;
+      /* XXX: Hack, sort of */
+      frag_shaders[num_rects] = c->fragment_shader.ycbcr_2_rgb;
       ++num_rects;
    }
 
@@ -459,6 +507,8 @@ static unsigned gen_data(struct vl_compositor *c,
          gen_rect_verts(num_rects, &c->layer_src_rects[i], &layer_inv_size,
                         &c->layer_dst_rects[i], &c->fb_inv_size, vb);
          textures[num_rects] = c->layers[i];
+         /* XXX: Hack */
+         frag_shaders[num_rects] = c->fragment_shader.rgb_2_rgb;
          ++num_rects;
          c->dirty_layers &= ~(1 << i);
       }
@@ -476,6 +526,7 @@ static void draw_layers(struct vl_compositor *c,
 {
    unsigned num_rects;
    struct pipe_surface *src_surfaces[VL_COMPOSITOR_MAX_LAYERS + 2];
+   void *frag_shaders[VL_COMPOSITOR_MAX_LAYERS + 2];
    unsigned i;
 
    assert(c);
@@ -483,7 +534,7 @@ static void draw_layers(struct vl_compositor *c,
    assert(src_rect);
    assert(dst_rect);
 
-   num_rects = gen_data(c, src_surface, src_rect, dst_rect, src_surfaces);
+   num_rects = gen_data(c, src_surface, src_rect, dst_rect, src_surfaces, frag_shaders);
 
    for (i = 0; i < num_rects; ++i) {
       boolean delete_view = FALSE;
@@ -502,6 +553,7 @@ static void draw_layers(struct vl_compositor *c,
                                            surface_view, c->pipe);
       }
 
+      c->pipe->bind_fs_state(c->pipe, frag_shaders[i]);
       c->pipe->set_fragment_sampler_views(c->pipe, 1, &surface_view);
       c->pipe->draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
 
@@ -554,7 +606,6 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport);
    compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 1, &compositor->sampler);
    compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
-   compositor->pipe->bind_fs_state(compositor->pipe, compositor->fragment_shader);
    compositor->pipe->set_vertex_buffers(compositor->pipe, 1, &compositor->vertex_buf);
    compositor->pipe->bind_vertex_elements_state(compositor->pipe, compositor->vertex_elems_state);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf);
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 026ae559ed7..820c9ef6ddb 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -47,7 +47,11 @@ struct vl_compositor
    void *sampler;
    struct pipe_sampler_view *sampler_view;
    void *vertex_shader;
-   void *fragment_shader;
+   struct
+   {
+      void *ycbcr_2_rgb;
+      void *rgb_2_rgb;
+   } fragment_shader;
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buf;
    void *vertex_elems_state;
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 59842c90d0d..e0c9e303817 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -234,10 +234,12 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
 {
    XvMCSubpicturePrivate *subpicture_priv;
    XvMCContextPrivate *context_priv;
-   struct pipe_screen *screen;
+   struct pipe_video_context *vpipe;
    struct pipe_transfer *xfer;
-   unsigned char *src, *dst;
+   unsigned char *src, *dst, *dst_line;
    unsigned x, y;
+   struct pipe_box dst_box = {dstx, dsty, 0, width, height, 1};
+   struct pipe_subresource sr = {0, 0};
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Compositing subpicture %p.\n", subpicture);
 
@@ -257,20 +259,19 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
 
    subpicture_priv = subpicture->privData;
    context_priv = subpicture_priv->context->privData;
-   screen = context_priv->vctx->vpipe->screen;
+   vpipe = context_priv->vctx->vpipe;
 
    /* TODO: Assert rects are within bounds? Or clip? */
 
-#if 0
-   xfer = screen->get_tex_transfer(screen, subpicture_priv->sfc->texture, 0, 0, 0,
-                                   PIPE_TRANSFER_WRITE, dstx, dsty, width, height);
+   xfer = vpipe->get_transfer(vpipe, subpicture_priv->sfc->texture,
+                              sr, PIPE_TRANSFER_WRITE, &dst_box);
    if (!xfer)
       return BadAlloc;
 
    src = image->data;
-   dst = screen->transfer_map(screen, xfer);
+   dst = vpipe->transfer_map(vpipe, xfer);
    if (!dst) {
-      screen->tex_transfer_destroy(xfer);
+      vpipe->transfer_destroy(vpipe, xfer);
       return BadAlloc;
    }
 
@@ -278,21 +279,21 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
       case FOURCC_RGB:
          assert(subpicture_priv->sfc->format == XvIDToPipe(image->id));
          for (y = 0; y < height; ++y) {
-            for (x = 0; x < width; ++x, src += 3, dst += 4) {
-               /* TODO: Confirm or fix */
-               dst[0] = src[0];
-               dst[1] = src[1];
-               dst[2] = src[2];
+            dst_line = dst;
+            for (x = 0; x < width; ++x, src += 3, dst_line += 4) {
+               dst_line[0] = src[2]; /* B */
+               dst_line[1] = src[1]; /* G */
+               dst_line[2] = src[0]; /* R */
             }
+            dst += xfer->stride;
          }
          break;
       default:
          XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized Xv image ID 0x%08X.\n", image->id);
    }
 
-   screen->transfer_unmap(screen, xfer);
-   screen->tex_transfer_destroy(xfer);
-#endif
+   vpipe->transfer_unmap(vpipe, xfer);
+   vpipe->transfer_destroy(vpipe, xfer);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p composited.\n", subpicture);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index ea1f648e7fc..0decc45a0bb 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -423,7 +423,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
       XVMC_MSG(XVMC_TRACE, "[XvMC] Surface %p has subpicture %p.\n", surface, surface_priv->subpicture);
 
       assert(subpicture_priv->surface == surface);
-      vpipe->set_picture_layers(vpipe, &subpicture_priv->sfc, &src_rects, &dst_rects, 1);
+      vpipe->set_picture_layers(vpipe, &subpicture_priv->sfc, src_rects, dst_rects, 1);
 
       surface_priv->subpicture = NULL;
       subpicture_priv->surface = NULL;
@@ -432,7 +432,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
       vpipe->set_picture_layers(vpipe, NULL, NULL, NULL, 0);
 
    vpipe->render_picture(vpipe, surface_priv->pipe_vsfc, PictureToPipe(flags), &src_rect,
-                         drawable_surface, &dst_rect, surface_priv->disp_fence);
+                         drawable_surface, &dst_rect, &surface_priv->disp_fence);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);
 
-- 
cgit v1.2.3


From f3e34ba6fba76870b1c91a27adb706d1b87aeec8 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sun, 27 Jun 2010 00:01:18 -0400
Subject: st/vdpau: Initial commit.

Enough plumbing here to get vdpauinfo working.
---
 src/gallium/state_trackers/vdpau/Makefile        |  15 ++
 src/gallium/state_trackers/vdpau/device.c        |  98 +++++++++++++
 src/gallium/state_trackers/vdpau/ftab.c          | 122 ++++++++++++++++
 src/gallium/state_trackers/vdpau/htab.c          |  94 +++++++++++++
 src/gallium/state_trackers/vdpau/query.c         | 171 +++++++++++++++++++++++
 src/gallium/state_trackers/vdpau/vdpau_private.h |  59 ++++++++
 src/gallium/targets/Makefile.vdpau               |  61 ++++++++
 src/gallium/targets/vdpau-softpipe/Makefile      |  19 +++
 8 files changed, 639 insertions(+)
 create mode 100644 src/gallium/state_trackers/vdpau/Makefile
 create mode 100644 src/gallium/state_trackers/vdpau/device.c
 create mode 100644 src/gallium/state_trackers/vdpau/ftab.c
 create mode 100644 src/gallium/state_trackers/vdpau/htab.c
 create mode 100644 src/gallium/state_trackers/vdpau/query.c
 create mode 100644 src/gallium/state_trackers/vdpau/vdpau_private.h
 create mode 100644 src/gallium/targets/Makefile.vdpau
 create mode 100644 src/gallium/targets/vdpau-softpipe/Makefile

diff --git a/src/gallium/state_trackers/vdpau/Makefile b/src/gallium/state_trackers/vdpau/Makefile
new file mode 100644
index 00000000000..346cce9d43b
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/Makefile
@@ -0,0 +1,15 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = vdpautracker
+
+LIBRARY_INCLUDES = \
+	$(shell pkg-config --cflags-only-I vdpau) \
+	-I$(TOP)/src/gallium/winsys/g3dvl
+
+C_SOURCES = htab.c \
+	    ftab.c \
+	    device.c \
+	    query.c
+
+include ../../Makefile.template
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
new file mode 100644
index 00000000000..83fcaff0282
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -0,0 +1,98 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <vdpau/vdpau_x11.h>
+#include <pipe/p_compiler.h>
+#include <vl_winsys.h>
+#include <util/u_memory.h>
+#include "vdpau_private.h"
+
+VdpDeviceCreateX11 vdp_imp_device_create_x11;
+
+PUBLIC VdpStatus
+vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, VdpGetProcAddress **get_proc_address)
+{
+   VdpStatus    ret;
+   vlVdpDevice *dev;
+
+   if (!(display && device && get_proc_address))
+      return VDP_STATUS_INVALID_POINTER;
+
+   if (!vlCreateHTAB()) {
+      ret = VDP_STATUS_RESOURCES;
+      goto no_htab;
+   }
+
+   dev = CALLOC(1, sizeof(vlVdpDevice));
+   if (!dev) {
+      ret = VDP_STATUS_RESOURCES;
+      goto no_dev;
+   }
+
+   *device = vlAddDataHTAB(dev);
+   if (*device == 0) {
+      ret = VDP_STATUS_ERROR;
+      goto no_handle;
+   }
+
+   *get_proc_address = &vlVdpGetProcAddress;
+
+   return VDP_STATUS_OK;
+
+no_handle:
+   FREE(dev);
+no_dev:
+   vlDestroyHTAB();
+no_htab:
+   return ret;
+}
+
+VdpStatus vlVdpDeviceDestroy(VdpDevice device)
+{
+   vlVdpDevice *dev = vlGetDataHTAB(device);
+   if (!dev)
+      return VDP_STATUS_INVALID_HANDLE;
+   FREE(dev);
+   vlDestroyHTAB();
+
+   return VDP_STATUS_OK;
+}
+
+VdpStatus vlVdpGetProcAddress(VdpDevice device, VdpFuncId function_id, void **function_pointer)
+{
+   vlVdpDevice *dev = vlGetDataHTAB(device);
+   if (!dev)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   if (!function_pointer)
+      return VDP_STATUS_INVALID_POINTER;
+
+   if (!vlGetFuncFTAB(function_id, function_pointer))
+      return VDP_STATUS_INVALID_FUNC_ID;
+
+   return VDP_STATUS_OK;
+}
diff --git a/src/gallium/state_trackers/vdpau/ftab.c b/src/gallium/state_trackers/vdpau/ftab.c
new file mode 100644
index 00000000000..a8a29857df7
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/ftab.c
@@ -0,0 +1,122 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+#include "vdpau_private.h"
+
+static void* ftab[67] =
+{
+   0, /* VDP_FUNC_ID_GET_ERROR_STRING */
+   0, /* VDP_FUNC_ID_GET_PROC_ADDRESS */
+   &vlVdpGetApiVersion, /* VDP_FUNC_ID_GET_API_VERSION */
+   0,
+   &vlVdpGetInformationString, /* VDP_FUNC_ID_GET_INFORMATION_STRING */
+   &vlVdpDeviceDestroy, /* VDP_FUNC_ID_DEVICE_DESTROY */
+   0, /* VDP_FUNC_ID_GENERATE_CSC_MATRIX */
+   &vlVdpVideoSurfaceQueryCapabilities, /* VDP_FUNC_ID_VIDEO_SURFACE_QUERY_CAPABILITIES */
+   &vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities, /* VDP_FUNC_ID_VIDEO_SURFACE_QUERY_GET_PUT_BITS_Y_CB_CR_CAPABILITIES */
+   0, /* VDP_FUNC_ID_VIDEO_SURFACE_CREATE */
+   0, /* VDP_FUNC_ID_VIDEO_SURFACE_DESTROY */
+   0, /* VDP_FUNC_ID_VIDEO_SURFACE_GET_PARAMETERS */
+   0, /* VDP_FUNC_ID_VIDEO_SURFACE_GET_BITS_Y_CB_CR */
+   0, /* VDP_FUNC_ID_VIDEO_SURFACE_PUT_BITS_Y_CB_CR */
+   &vlVdpOutputSurfaceQueryCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_CAPABILITIES */
+   &vlVdpOutputSurfaceQueryGetPutBitsNativeCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_GET_PUT_BITS_NATIVE_CAPABILITIES */
+   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_PUT_BITS_INDEXED_CAPABILITIES */
+   &vlVdpOutputSurfaceQueryPutBitsYCbCrCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_PUT_BITS_Y_CB_CR_CAPABILITIES */
+   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_CREATE */
+   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_DESTROY */
+   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_GET_PARAMETERS */
+   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_GET_BITS_NATIVE */
+   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_NATIVE */
+   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_INDEXED */
+   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_Y_CB_CR */
+   &vlVdpBitmapSurfaceQueryCapabilities, /* VDP_FUNC_ID_BITMAP_SURFACE_QUERY_CAPABILITIES */
+   0, /* VDP_FUNC_ID_BITMAP_SURFACE_CREATE */
+   0, /* VDP_FUNC_ID_BITMAP_SURFACE_DESTROY */
+   0, /* VDP_FUNC_ID_BITMAP_SURFACE_GET_PARAMETERS */
+   0, /* VDP_FUNC_ID_BITMAP_SURFACE_PUT_BITS_NATIVE */
+   0,
+   0,
+   0,
+   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_OUTPUT_SURFACE */
+   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_BITMAP_SURFACE */
+   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_VIDEO_SURFACE_LUMA */
+   &vlVdpDecoderQueryCapabilities, /* VDP_FUNC_ID_DECODER_QUERY_CAPABILITIES */
+   0, /* VDP_FUNC_ID_DECODER_CREATE */
+   0, /* VDP_FUNC_ID_DECODER_DESTROY */
+   0, /* VDP_FUNC_ID_DECODER_GET_PARAMETERS */
+   0, /* VDP_FUNC_ID_DECODER_RENDER */
+   &vlVdpVideoMixerQueryFeatureSupport, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_FEATURE_SUPPORT */
+   &vlVdpVideoMixerQueryParameterSupport, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_PARAMETER_SUPPORT */
+   &vlVdpVideoMixerQueryAttributeSupport, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_ATTRIBUTE_SUPPORT */
+   &vlVdpVideoMixerQueryParameterValueRange, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_PARAMETER_VALUE_RANGE */
+   &vlVdpVideoMixerQueryAttributeValueRange, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_ATTRIBUTE_VALUE_RANGE */
+   0, /* VDP_FUNC_ID_VIDEO_MIXER_CREATE */
+   0, /* VDP_FUNC_ID_VIDEO_MIXER_SET_FEATURE_ENABLES */
+   0, /* VDP_FUNC_ID_VIDEO_MIXER_SET_ATTRIBUTE_VALUES */
+   0, /* VDP_FUNC_ID_VIDEO_MIXER_GET_FEATURE_SUPPORT */
+   0, /* VDP_FUNC_ID_VIDEO_MIXER_GET_FEATURE_ENABLES */
+   0, /* VDP_FUNC_ID_VIDEO_MIXER_GET_PARAMETER_VALUES */
+   0, /* VDP_FUNC_ID_VIDEO_MIXER_GET_ATTRIBUTE_VALUES */
+   0, /* VDP_FUNC_ID_VIDEO_MIXER_DESTROY */
+   0, /* VDP_FUNC_ID_VIDEO_MIXER_RENDER */
+   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_DESTROY */
+   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_CREATE */
+   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_DESTROY */
+   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_SET_BACKGROUND_COLOR */
+   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_GET_BACKGROUND_COLOR */
+   0,
+   0,
+   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_GET_TIME */
+   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_DISPLAY */
+   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_BLOCK_UNTIL_SURFACE_IDLE */
+   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_QUERY_SURFACE_STATUS */
+   0  /* VDP_FUNC_ID_PREEMPTION_CALLBACK_REGISTER */
+};
+
+static void* ftab_winsys[1] =
+{
+   0  /* VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_CREATE_X11 */
+};
+
+boolean vlGetFuncFTAB(VdpFuncId function_id, void **func)
+{
+   assert(func);
+   if (function_id < VDP_FUNC_ID_BASE_WINSYS) {
+      if (function_id > 66)
+         return FALSE;
+      *func = ftab[function_id];
+   }
+   else {
+      function_id -= VDP_FUNC_ID_BASE_WINSYS;
+      if (function_id > 0)
+        return FALSE;
+      *func = ftab_winsys[function_id];
+   }
+   return TRUE;
+}
diff --git a/src/gallium/state_trackers/vdpau/htab.c b/src/gallium/state_trackers/vdpau/htab.c
new file mode 100644
index 00000000000..0c958055374
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/htab.c
@@ -0,0 +1,94 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <util/u_handle_table.h>
+#include <os/os_thread.h>
+#include "vdpau_private.h"
+
+#ifdef VL_HANDLES
+static struct handle_table *htab = NULL;
+pipe_static_mutex(htab_lock);
+#endif
+
+boolean vlCreateHTAB(void)
+{
+#ifdef VL_HANDLES
+   boolean ret;
+   /* Make sure handle table handles match VDPAU handles. */
+   assert(sizeof(unsigned) <= sizeof(vlHandle));
+   pipe_mutex_lock(htab_lock);
+   if (!htab)
+      htab = handle_table_create();
+   ret = htab != NULL;
+   pipe_mutex_unlock(htab_lock);
+   return ret;
+#else
+   return TRUE;
+#endif
+}
+
+void vlDestroyHTAB(void)
+{
+#ifdef VL_HANDLES
+   pipe_mutex_lock(htab_lock);
+   if (htab) {
+      handle_table_destroy(htab);
+      htab = NULL;
+   }
+   pipe_mutex_unlock(htab_lock);
+#endif
+}
+
+vlHandle vlAddDataHTAB(void *data)
+{
+   assert(data);
+#ifdef VL_HANDLES
+   vlHandle handle = 0;
+   pipe_mutex_lock(htab_lock);
+   if (htab)
+      handle = handle_table_add(htab, data);
+   pipe_mutex_unlock(htab_lock);
+   return handle;
+#else
+   return (vlHandle)data;
+#endif
+}
+
+void* vlGetDataHTAB(vlHandle handle)
+{
+   assert(handle);
+#ifdef VL_HANDLES
+   void *data = NULL;
+   pipe_mutex_lock(htab_lock);
+   if (htab)
+      data = handle_table_get(htab, handle);
+   pipe_mutex_unlock(htab_lock);
+   return data;
+#else
+   return (void*)handle;
+#endif
+}
diff --git a/src/gallium/state_trackers/vdpau/query.c b/src/gallium/state_trackers/vdpau/query.c
new file mode 100644
index 00000000000..57bd7fb7526
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/query.c
@@ -0,0 +1,171 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vdpau_private.h"
+
+VdpStatus
+vlVdpGetApiVersion(uint32_t *api_version)
+{
+   if (!api_version)
+      return VDP_STATUS_INVALID_POINTER;
+
+   *api_version = 1;
+   return VDP_STATUS_OK;
+}
+
+VdpStatus
+vlVdpGetInformationString(char const **information_string)
+{
+   if (!information_string)
+      return VDP_STATUS_INVALID_POINTER;
+
+   *information_string = "VDPAU-G3DVL";
+   return VDP_STATUS_OK;
+}
+
+VdpStatus
+vlVdpVideoSurfaceQueryCapabilities(VdpDevice device, VdpChromaType surface_chroma_type,
+                                   VdpBool *is_supported, uint32_t *max_width, uint32_t *max_height)
+{
+   if (!(is_supported && max_width && max_height))
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaType surface_chroma_type,
+                                                  VdpYCbCrFormat bits_ycbcr_format,
+                                                  VdpBool *is_supported)
+{
+   if (!is_supported)
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpDecoderQueryCapabilities(VdpDevice device, VdpDecoderProfile profile,
+                              VdpBool *is_supported, uint32_t *max_level, uint32_t *max_macroblocks,
+                              uint32_t *max_width, uint32_t *max_height)
+{
+   if (!(is_supported && max_level && max_macroblocks && max_width && max_height))
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpOutputSurfaceQueryCapabilities(VdpDevice device, VdpRGBAFormat surface_rgba_format,
+                                    VdpBool *is_supported, uint32_t *max_width, uint32_t *max_height)
+{
+   if (!(is_supported && max_width && max_height))
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpOutputSurfaceQueryGetPutBitsNativeCapabilities(VdpDevice device, VdpRGBAFormat surface_rgba_format,
+                                                    VdpBool *is_supported)
+{
+   if (!is_supported)
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpOutputSurfaceQueryPutBitsYCbCrCapabilities(VdpDevice device, VdpRGBAFormat surface_rgba_format,
+                                                VdpYCbCrFormat bits_ycbcr_format,
+                                                VdpBool *is_supported)
+{
+   if (!is_supported)
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpBitmapSurfaceQueryCapabilities(VdpDevice device, VdpRGBAFormat surface_rgba_format,
+                                    VdpBool *is_supported, uint32_t *max_width, uint32_t *max_height)
+{
+   if (!(is_supported && max_width && max_height))
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpVideoMixerQueryFeatureSupport(VdpDevice device, VdpVideoMixerFeature feature,
+                                   VdpBool *is_supported)
+{
+   if (!is_supported)
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpVideoMixerQueryParameterSupport(VdpDevice device, VdpVideoMixerParameter parameter,
+                                     VdpBool *is_supported)
+{
+   if (!is_supported)
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpVideoMixerQueryParameterValueRange(VdpDevice device, VdpVideoMixerParameter parameter,
+                                        void *min_value, void *max_value)
+{
+   if (!(min_value && max_value))
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpVideoMixerQueryAttributeSupport(VdpDevice device, VdpVideoMixerAttribute attribute,
+                                     VdpBool *is_supported)
+{
+   if (!is_supported)
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpVideoMixerQueryAttributeValueRange(VdpDevice device, VdpVideoMixerAttribute attribute,
+                                        void *min_value, void *max_value)
+{
+   if (!(min_value && max_value))
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
new file mode 100644
index 00000000000..8f54ae657ce
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -0,0 +1,59 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <vdpau/vdpau.h>
+#include <pipe/p_compiler.h>
+
+typedef struct
+{
+   int dummy;
+} vlVdpDevice;
+
+typedef uint32_t vlHandle;
+
+boolean vlCreateHTAB(void);
+void vlDestroyHTAB(void);
+vlHandle vlAddDataHTAB(void *data);
+void* vlGetDataHTAB(vlHandle handle);
+boolean vlGetFuncFTAB(VdpFuncId function_id, void **func);
+
+VdpDeviceDestroy vlVdpDeviceDestroy;
+VdpGetProcAddress vlVdpGetProcAddress;
+VdpGetApiVersion vlVdpGetApiVersion;
+VdpGetInformationString vlVdpGetInformationString;
+VdpVideoSurfaceQueryCapabilities vlVdpVideoSurfaceQueryCapabilities;
+VdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities;
+VdpDecoderQueryCapabilities vlVdpDecoderQueryCapabilities;
+VdpOutputSurfaceQueryCapabilities vlVdpOutputSurfaceQueryCapabilities;
+VdpOutputSurfaceQueryGetPutBitsNativeCapabilities vlVdpOutputSurfaceQueryGetPutBitsNativeCapabilities;
+VdpOutputSurfaceQueryPutBitsYCbCrCapabilities vlVdpOutputSurfaceQueryPutBitsYCbCrCapabilities;
+VdpBitmapSurfaceQueryCapabilities vlVdpBitmapSurfaceQueryCapabilities;
+VdpVideoMixerQueryFeatureSupport vlVdpVideoMixerQueryFeatureSupport;
+VdpVideoMixerQueryParameterSupport vlVdpVideoMixerQueryParameterSupport;
+VdpVideoMixerQueryParameterValueRange vlVdpVideoMixerQueryParameterValueRange;
+VdpVideoMixerQueryAttributeSupport vlVdpVideoMixerQueryAttributeSupport;
+VdpVideoMixerQueryAttributeValueRange vlVdpVideoMixerQueryAttributeValueRange;
diff --git a/src/gallium/targets/Makefile.vdpau b/src/gallium/targets/Makefile.vdpau
new file mode 100644
index 00000000000..e5c3dad7dad
--- /dev/null
+++ b/src/gallium/targets/Makefile.vdpau
@@ -0,0 +1,61 @@
+# This makefile template is used to build libvdpau_g3dvl.so
+
+LIBBASENAME = vdpau_g3dvl
+LIBNAME = lib$(LIBBASENAME).so
+VDPAU_MAJOR = 1
+VDPAU_MINOR = 0
+INCLUDES = -I$(TOP)/src/gallium/include \
+	   -I$(TOP)/src/gallium/drivers \
+	   -I$(TOP)/src/gallium/auxiliary \
+	   -I$(TOP)/src/gallium/winsys/g3dvl \
+	   $(DRIVER_INCLUDES)
+DEFINES = -DGALLIUM_TRACE $(DRIVER_DEFINES)
+LIBS = $(EXTRA_LIB_PATH) $(DRIVER_LIBS) -lvdpau -lXext -lX11 -lm
+STATE_TRACKER_LIB = $(TOP)/src/gallium/state_trackers/vdpau/libvdpautracker.a
+
+# XXX: Hack, VDPAU public funcs aren't exported if we link to libvdpautracker.a :(
+OBJECTS = $(C_SOURCES:.c=.o) \
+	  $(ASM_SOURCES:.S=.o) \
+	  $(TOP)/src/gallium/state_trackers/vdpau/*.o
+
+##### RULES #####
+
+.c.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+.S.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+##### TARGETS #####
+
+default: depend symlinks $(TOP)/$(LIB_DIR)/gallium/$(LIBNAME)
+
+$(TOP)/$(LIB_DIR)/gallium/$(LIBNAME): $(OBJECTS) $(PIPE_DRIVERS) $(STATE_TRACKER_LIB) $(TOP)/$(LIB_DIR)/gallium Makefile
+	$(MKLIB) -o $(LIBBASENAME) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+		-major $(VDPAU_MAJOR) -minor $(VDPAU_MINOR) $(MKLIB_OPTIONS) \
+		-install $(TOP)/$(LIB_DIR)/gallium \
+		$(OBJECTS) $(STATE_TRACKER_LIB) $(PIPE_DRIVERS) $(LIBS)
+
+$(TOP)/$(LIB_DIR)/gallium:
+	mkdir -p $@
+
+depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
+	rm -f depend
+	touch depend
+	$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDES) $(C_SOURCES) \
+		$(ASM_SOURCES) 2> /dev/null
+
+# Emacs tags
+tags:
+	etags `find . -name \*.[ch]` `find ../include`
+
+# Remove .o and backup files
+clean:
+	-rm -f *.o *~ *.so $(SYMLINKS)
+	-rm -f depend depend.bak
+
+#install: $(LIBNAME)
+#	$(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+#	$(MINSTALL) -m 755 $(LIBNAME) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+
+include depend
diff --git a/src/gallium/targets/vdpau-softpipe/Makefile b/src/gallium/targets/vdpau-softpipe/Makefile
new file mode 100644
index 00000000000..29dea50e7fb
--- /dev/null
+++ b/src/gallium/targets/vdpau-softpipe/Makefile
@@ -0,0 +1,19 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+DRIVER_DEFINES = -DGALLIUM_SOFTPIPE
+DRIVER_INCLUDES =
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/winsys/sw/xlib/libws_xlib.a \
+	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+	$(TOP)/src/gallium/auxiliary/libgallium.a
+
+C_SOURCES = \
+	$(TOP)/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+
+DRIVER_LIBS =
+
+include ../Makefile.vdpau
+
+symlinks:
-- 
cgit v1.2.3


From 15bc635499589dc3490e5bdc198dfd376ce6fb1f Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Wed, 14 Jul 2010 00:30:46 +0200
Subject: added surface.c and made some changes in device.c

---
 src/gallium/state_trackers/vdpau/surface.c | 161 +++++++++++++++++++++++++++++
 1 file changed, 161 insertions(+)
 create mode 100644 src/gallium/state_trackers/vdpau/surface.c

diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
new file mode 100644
index 00000000000..1f481098ede
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -0,0 +1,161 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vdpau_private.h"
+#include <pipe/p_screen.h>
+#include <pipe/p_state.h>
+#include <util/u_memory.h>
+
+VdpStatus
+vlVdpVideoSurfaceCreate(VdpDevice device,
+			VdpChromaType chroma_type, 
+			uint32_t width, 
+			uint32_t height, 
+			VdpVideoSurface *surface)
+{
+    vlVdpSurface *p_surf;
+    VdpStatus ret;
+
+    if (!(width && height))
+      {
+         ret = VDP_STATUS_INVALID_SIZE;
+         goto inv_size;
+      }
+      
+
+    if (!vlCreateHTAB()) {
+       ret = VDP_STATUS_RESOURCES;
+       goto no_htab;
+    }
+
+   p_surf = CALLOC(0, sizeof(p_surf));
+   if (!p_surf) {
+      ret = VDP_STATUS_RESOURCES;
+      goto no_res;
+   }
+
+   p_surf->psurface = CALLOC(0,sizeof(struct pipe_surface));
+   if (!p_surf->psurface)  {
+	   ret = VDP_STATUS_RESOURCES;
+	   goto no_surf;
+   }
+
+   vlVdpDevice *dev = vlGetDataHTAB(device);
+   if (!dev)  {
+      ret = VDP_STATUS_INVALID_HANDLE;
+      goto inv_device;
+   }
+
+   if (!dev->vlscreen)
+   dev->vlscreen = vl_screen_create(dev->display, dev->screen);
+   if (!dev->vlscreen)   {
+      ret = VDP_STATUS_RESOURCES;
+      goto inv_device;
+   }
+
+   p_surf->psurface->height = height;
+   p_surf->psurface->width = width;
+   p_surf->psurface->level = 0;
+   p_surf->psurface->usage = PIPE_USAGE_DEFAULT;
+   p_surf->chroma_format = FormatToPipe(chroma_type);
+   p_surf->vlscreen = dev->vlscreen;
+    
+   *surface = vlAddDataHTAB(p_surf);
+   if (*surface == 0) {
+      ret = VDP_STATUS_ERROR;
+      goto no_handle;
+   }
+   
+   return VDP_STATUS_OK;
+
+no_handle:
+   FREE(p_surf->psurface);
+no_surf:
+   FREE(p_surf);
+no_res:
+   // vlDestroyHTAB(); XXX: Do not destroy this tab, I think.
+no_htab:
+inv_size:
+   return ret;
+}
+
+VdpStatus
+vlVdpVideoSurfaceDestroy  ( VdpVideoSurface surface )
+{
+   vlVdpSurface *p_surf;
+
+   p_surf = (vlVdpSurface *)vlGetDataHTAB((vlHandle)surface);
+   if (!p_surf)
+       return VDP_STATUS_INVALID_HANDLE;
+
+   if (p_surf->psurface)
+	   p_surf->vlscreen->pscreen->tex_surface_destroy(p_surf->psurface);
+	   
+   FREE(p_surf);
+   return VDP_STATUS_OK;
+}
+
+VdpStatus
+vlVdpVideoSurfaceGetParameters ( VdpVideoSurface surface, 
+				 VdpChromaType *chroma_type, 
+				 uint32_t *width, 
+				 uint32_t *height
+)
+{
+   if (!(width && height && chroma_type))  
+      return VDP_STATUS_INVALID_POINTER; 
+   
+
+   if (!vlCreateHTAB()) 
+      return VDP_STATUS_RESOURCES;
+
+
+   vlVdpSurface *p_surf = vlGetDataHTAB(surface);
+   if (!p_surf) 
+      return VDP_STATUS_INVALID_HANDLE;
+
+
+   if (!(p_surf->psurface && p_surf->chroma_format))  
+      return VDP_STATUS_INVALID_HANDLE;
+
+   *width = p_surf->psurface->width;
+   *height = p_surf->psurface->height;
+   *chroma_type = PipeToType(p_surf->chroma_format);
+
+   return VDP_STATUS_OK;
+}
+
+VdpStatus
+vlVdpVideoSurfaceGetBitsYCbCr ( VdpVideoSurface surface, 
+				VdpYCbCrFormat destination_ycbcr_format, 
+				void *const *destination_data, 
+				uint32_t const *destination_pitches
+)
+{
+   
+
+}
-- 
cgit v1.2.3


From 3299997bcc5a672617095adb560b3834dced39a6 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@io.dk>
Date: Wed, 14 Jul 2010 00:34:56 +0200
Subject: vdpau changes

---
 configs/autoconf.in                                |   3 +
 configs/linux-dri                                  |   7 +-
 configs/linux-dri-x86-64                           |   2 +-
 configure.ac                                       |  25 +-
 src/gallium/state_trackers/vdpau/Makefile          |   9 +-
 src/gallium/state_trackers/vdpau/device.c          |   8 +-
 src/gallium/state_trackers/vdpau/ftab.c            |   2 +-
 src/gallium/state_trackers/vdpau/query.c           |  57 +-
 src/gallium/state_trackers/vdpau/vdpau_private.h   | 107 +++-
 src/gallium/targets/Makefile.vdpau                 |   9 +-
 src/gallium/tests/python/retrace/README            |  17 -
 src/gallium/tests/python/retrace/format.py         | 173 -----
 src/gallium/tests/python/retrace/model.py          | 213 -------
 src/gallium/tests/python/retrace/parse.py          | 392 ------------
 src/gallium/tests/python/retrace/parser.py         |  34 -
 src/gallium/tests/python/samples/gs.py             | 254 --------
 src/gallium/tests/python/samples/tri.py            | 233 -------
 src/gallium/tests/python/tests/.gitignore          |   3 -
 src/gallium/tests/python/tests/base.py             | 399 ------------
 .../tests/regress/fragment-shader/.gitignore       |   1 -
 .../tests/regress/fragment-shader/frag-abs.sh      |  13 -
 .../tests/regress/fragment-shader/frag-add.sh      |   8 -
 .../tests/regress/fragment-shader/frag-cb-1d.sh    |  13 -
 .../tests/regress/fragment-shader/frag-dp3.sh      |   8 -
 .../tests/regress/fragment-shader/frag-dp4.sh      |   8 -
 .../tests/regress/fragment-shader/frag-dst.sh      |   8 -
 .../tests/regress/fragment-shader/frag-ex2.sh      |  11 -
 .../tests/regress/fragment-shader/frag-flr.sh      |  15 -
 .../tests/regress/fragment-shader/frag-frc.sh      |  13 -
 .../tests/regress/fragment-shader/frag-lg2.sh      |  15 -
 .../tests/regress/fragment-shader/frag-lit.sh      |   8 -
 .../tests/regress/fragment-shader/frag-lrp.sh      |  11 -
 .../tests/regress/fragment-shader/frag-mad.sh      |  11 -
 .../tests/regress/fragment-shader/frag-max.sh      |  10 -
 .../tests/regress/fragment-shader/frag-min.sh      |  10 -
 .../tests/regress/fragment-shader/frag-mov.sh      |   8 -
 .../tests/regress/fragment-shader/frag-mul.sh      |  10 -
 .../tests/regress/fragment-shader/frag-rcp.sh      |  15 -
 .../tests/regress/fragment-shader/frag-rsq.sh      |  15 -
 .../tests/regress/fragment-shader/frag-sge.sh      |  13 -
 .../tests/regress/fragment-shader/frag-slt.sh      |  13 -
 .../regress/fragment-shader/frag-srcmod-abs.sh     |  13 -
 .../regress/fragment-shader/frag-srcmod-absneg.sh  |  15 -
 .../regress/fragment-shader/frag-srcmod-neg.sh     |  11 -
 .../regress/fragment-shader/frag-srcmod-swz.sh     |   8 -
 .../tests/regress/fragment-shader/frag-sub.sh      |   8 -
 .../tests/regress/fragment-shader/frag-xpd.sh      |   8 -
 .../regress/fragment-shader/fragment-shader.py     | 257 --------
 .../python/tests/regress/vertex-shader/.gitignore  |   1 -
 .../tests/regress/vertex-shader/vertex-shader.py   | 287 ---------
 src/gallium/tests/python/tests/texture_render.py   | 320 ----------
 src/gallium/tests/python/tests/tree.py             |  23 -
 src/gallium/tests/trivial/.gitignore               |   3 -
 src/gallium/tests/trivial/Makefile                 |  44 --
 src/gallium/tests/unit/Makefile                    |  47 --
 src/gallium/tests/unit/SConscript                  |  25 -
 src/gallium/tests/unit/pipe_barrier_test.c         |  86 ---
 src/gallium/tests/unit/u_cache_test.c              | 121 ----
 src/gallium/tests/unit/u_format_test.c             | 708 ---------------------
 src/gallium/tests/unit/u_half_test.c               |  32 -
 60 files changed, 208 insertions(+), 3993 deletions(-)
 delete mode 100644 src/gallium/tests/python/retrace/README
 delete mode 100755 src/gallium/tests/python/retrace/format.py
 delete mode 100755 src/gallium/tests/python/retrace/model.py
 delete mode 100755 src/gallium/tests/python/retrace/parse.py
 delete mode 100755 src/gallium/tests/python/retrace/parser.py
 delete mode 100644 src/gallium/tests/python/samples/gs.py
 delete mode 100644 src/gallium/tests/python/samples/tri.py
 delete mode 100644 src/gallium/tests/python/tests/.gitignore
 delete mode 100755 src/gallium/tests/python/tests/base.py
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/.gitignore
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-abs.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-add.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-cb-1d.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-dp3.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-dp4.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-dst.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-ex2.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-flr.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-frc.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-lg2.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-lit.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-lrp.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-mad.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-max.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-min.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-mov.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-mul.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-rcp.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-rsq.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-sge.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-slt.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-abs.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-absneg.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-neg.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-swz.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-sub.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/frag-xpd.sh
 delete mode 100644 src/gallium/tests/python/tests/regress/fragment-shader/fragment-shader.py
 delete mode 100644 src/gallium/tests/python/tests/regress/vertex-shader/.gitignore
 delete mode 100644 src/gallium/tests/python/tests/regress/vertex-shader/vertex-shader.py
 delete mode 100755 src/gallium/tests/python/tests/texture_render.py
 delete mode 100755 src/gallium/tests/python/tests/tree.py
 delete mode 100644 src/gallium/tests/trivial/.gitignore
 delete mode 100644 src/gallium/tests/trivial/Makefile
 delete mode 100644 src/gallium/tests/unit/Makefile
 delete mode 100644 src/gallium/tests/unit/SConscript
 delete mode 100644 src/gallium/tests/unit/pipe_barrier_test.c
 delete mode 100644 src/gallium/tests/unit/u_cache_test.c
 delete mode 100644 src/gallium/tests/unit/u_format_test.c
 delete mode 100644 src/gallium/tests/unit/u_half_test.c

diff --git a/configs/autoconf.in b/configs/autoconf.in
index 3ef385a8a66..9abf1618024 100644
--- a/configs/autoconf.in
+++ b/configs/autoconf.in
@@ -138,6 +138,9 @@ DRI_DRIVER_SEARCH_DIR = @DRI_DRIVER_SEARCH_DIR@
 # EGL driver install directory
 EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
 
+# VDPAU library install directory
+VDPAU_LIB_INSTALL_DIR=@VDPAU_LIB_INSTALL_DIR@
+
 # Xorg driver install directory (for xorg state-tracker)
 XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
 
diff --git a/configs/linux-dri b/configs/linux-dri
index 49e35790463..eca321bc6ac 100644
--- a/configs/linux-dri
+++ b/configs/linux-dri
@@ -58,12 +58,11 @@ PROGRAM_DIRS := egl/eglut egl/opengl $(PROGRAM_DIRS)
 EGL_DRIVERS_DIRS = glx
 
 DRIVER_DIRS = dri
-GALLIUM_WINSYS_DIRS = sw sw/xlib drm/vmware drm/intel drm/i965
+GALLIUM_WINSYS_DIRS = sw sw/xlib
 GALLIUM_TARGET_DIRS = egl-swrast
-GALLIUM_STATE_TRACKERS_DIRS = egl
+GALLIUM_STATE_TRACKERS_DIRS = egl vdpau
 
-DRI_DIRS = i810 i915 i965 mach64 mga r128 r200 r300 radeon \
-	savage sis tdfx unichrome swrast
+DRI_DIRS = r300 radeon swrast
 
 INTEL_LIBS = `pkg-config --libs libdrm_intel`
 INTEL_CFLAGS = `pkg-config --cflags libdrm_intel`
diff --git a/configs/linux-dri-x86-64 b/configs/linux-dri-x86-64
index 656cf6140d7..90e6c215adb 100644
--- a/configs/linux-dri-x86-64
+++ b/configs/linux-dri-x86-64
@@ -20,5 +20,5 @@ EXTRA_LIB_PATH=-L/usr/X11R6/lib64
 # the new interface.  i810 are missing because there is no x86-64
 # system where they could *ever* be used.
 #
-DRI_DIRS = i915 i965 mach64 mga r128 r200 r300 radeon savage tdfx unichrome
+DRI_DIRS = swrast
 
diff --git a/configure.ac b/configure.ac
index 757bc1e8e78..c5b2d670456 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1255,13 +1255,20 @@ yes)
             # mesa/es is required to build es state tracker
             CORE_DIRS="$CORE_DIRS mesa/es"
             ;;
-        xorg/xvmc)
-            # Check for libXvMC?
+	xorg/xvmc)
+            # Check for xvmc?
             if test "x$enable_gallium_g3dvl" != xyes; then
                 AC_MSG_ERROR([cannot build XvMC state tracker without --enable-gallium-g3dvl])
             fi
             HAVE_ST_XVMC="yes"
             ;;
+        vdpau)
+            # Check for libvdpau?
+            if test "x$enable_gallium_g3dvl" != xyes; then
+                AC_MSG_ERROR([cannot build vdpau state tracker without --enable-gallium-g3dvl])
+            fi
+            HAVE_ST_VDPAU="yes"
+            ;;
         esac
     done
     GALLIUM_STATE_TRACKERS_DIRS="$state_trackers"
@@ -1365,7 +1372,7 @@ dnl
 dnl Gallium helper functions
 dnl
 gallium_check_st() {
-    if test "x$HAVE_ST_DRI" = xyes || test "x$HAVE_ST_EGL" = xyes || test "x$HAVE_ST_XORG" = xyes || test "x$HAVE_ST_XVMC" = xyes; then
+    if test "x$HAVE_ST_DRI" = xyes || test "x$HAVE_ST_EGL" = xyes || test "x$HAVE_ST_XORG" = xyes || test "x$HAVE_ST_XVMC" = xyes || test "x$HAVE_ST_VDPAU" = xyes; then
          GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS $1"
     fi
     if test "x$HAVE_ST_DRI" = xyes && test "x$2" != x; then
@@ -1380,6 +1387,9 @@ gallium_check_st() {
     if test "x$HAVE_ST_XVMC" = xyes && test "x$5" != x; then
          GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $5"
     fi
+    if test "x$HAVE_ST_VDPAU" = xyes && test "x$6" != x; then
+         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $6"
+    fi
 }
 
 
@@ -1454,13 +1464,20 @@ AC_ARG_ENABLE([gallium-g3dvl],
 if test "x$enable_gallium_g3dvl" = xyes; then
     case "$mesa_driver" in
     xlib)
-        GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS xvmc-softpipe"
+        GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS vdpau-softpipe"
         ;;
     dri)
         GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS g3dvl/dri"
         ;;
     esac
 fi
+dnl Directory for VDPAU libs
+AC_ARG_WITH([vdpau-libdir],
+    [AS_HELP_STRING([--with-vdpau-libdir=DIR],
+        [directory for the VDPAU libraries @<:@default=${libdir}/vdpau@:>@])],
+    [VDPAU_LIB_INSTALL_DIR="$withval"],
+    [VDPAU_LIB_INSTALL_DIR='${libdir}/vdpau'])
+AC_SUBST([VDPAU_LIB_INSTALL_DIR])
 
 dnl
 dnl Gallium swrast configuration
diff --git a/src/gallium/state_trackers/vdpau/Makefile b/src/gallium/state_trackers/vdpau/Makefile
index 346cce9d43b..53378a9c1ff 100644
--- a/src/gallium/state_trackers/vdpau/Makefile
+++ b/src/gallium/state_trackers/vdpau/Makefile
@@ -3,6 +3,10 @@ include $(TOP)/configs/current
 
 LIBNAME = vdpautracker
 
+VDPAU_MAJOR = 1
+VDPAU_MINOR = 0
+LIBRARY_DEFINES = -DVER_MAJOR=$(VDPAU_MAJOR) -DVER_MINOR=$(VDPAU_MINOR) $(STATE_TRACKER_DEFINES)
+
 LIBRARY_INCLUDES = \
 	$(shell pkg-config --cflags-only-I vdpau) \
 	-I$(TOP)/src/gallium/winsys/g3dvl
@@ -10,6 +14,9 @@ LIBRARY_INCLUDES = \
 C_SOURCES = htab.c \
 	    ftab.c \
 	    device.c \
-	    query.c
+	    query.c \
+	    surface.c
+
 
 include ../../Makefile.template
+
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index 83fcaff0282..ba91e16a43f 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -37,7 +37,8 @@ PUBLIC VdpStatus
 vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, VdpGetProcAddress **get_proc_address)
 {
    VdpStatus    ret;
-   vlVdpDevice *dev;
+   vlVdpDevice *dev = NULL;
+   struct vl_screen *vlscreen = NULL;
 
    if (!(display && device && get_proc_address))
       return VDP_STATUS_INVALID_POINTER;
@@ -47,11 +48,14 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, VdpGe
       goto no_htab;
    }
 
-   dev = CALLOC(1, sizeof(vlVdpDevice));
+   dev = CALLOC(0, sizeof(vlVdpDevice));
    if (!dev) {
       ret = VDP_STATUS_RESOURCES;
       goto no_dev;
    }
+   dev->display = display;
+   dev->screen = screen;
+
 
    *device = vlAddDataHTAB(dev);
    if (*device == 0) {
diff --git a/src/gallium/state_trackers/vdpau/ftab.c b/src/gallium/state_trackers/vdpau/ftab.c
index a8a29857df7..7e476e5ee28 100644
--- a/src/gallium/state_trackers/vdpau/ftab.c
+++ b/src/gallium/state_trackers/vdpau/ftab.c
@@ -39,7 +39,7 @@ static void* ftab[67] =
    0, /* VDP_FUNC_ID_GENERATE_CSC_MATRIX */
    &vlVdpVideoSurfaceQueryCapabilities, /* VDP_FUNC_ID_VIDEO_SURFACE_QUERY_CAPABILITIES */
    &vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities, /* VDP_FUNC_ID_VIDEO_SURFACE_QUERY_GET_PUT_BITS_Y_CB_CR_CAPABILITIES */
-   0, /* VDP_FUNC_ID_VIDEO_SURFACE_CREATE */
+   &vlVdpVideoSurfaceCreate, /* VDP_FUNC_ID_VIDEO_SURFACE_CREATE */
    0, /* VDP_FUNC_ID_VIDEO_SURFACE_DESTROY */
    0, /* VDP_FUNC_ID_VIDEO_SURFACE_GET_PARAMETERS */
    0, /* VDP_FUNC_ID_VIDEO_SURFACE_GET_BITS_Y_CB_CR */
diff --git a/src/gallium/state_trackers/vdpau/query.c b/src/gallium/state_trackers/vdpau/query.c
index 57bd7fb7526..71793cc8ad5 100644
--- a/src/gallium/state_trackers/vdpau/query.c
+++ b/src/gallium/state_trackers/vdpau/query.c
@@ -26,6 +26,11 @@
  **************************************************************************/
 
 #include "vdpau_private.h"
+#include <vl_winsys.h>
+#include <assert.h>
+#include <pipe/p_screen.h>
+#include <math.h>
+
 
 VdpStatus
 vlVdpGetApiVersion(uint32_t *api_version)
@@ -43,7 +48,7 @@ vlVdpGetInformationString(char const **information_string)
    if (!information_string)
       return VDP_STATUS_INVALID_POINTER;
 
-   *information_string = "VDPAU-G3DVL";
+   *information_string = INFORMATION_STRING;
    return VDP_STATUS_OK;
 }
 
@@ -51,10 +56,40 @@ VdpStatus
 vlVdpVideoSurfaceQueryCapabilities(VdpDevice device, VdpChromaType surface_chroma_type,
                                    VdpBool *is_supported, uint32_t *max_width, uint32_t *max_height)
 {
+   uint32_t max_2d_texture_level;
+   VdpStatus ret;
+
    if (!(is_supported && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
 
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   vlVdpDevice *dev = vlGetDataHTAB(device);
+   if (!dev)
+      return VDP_STATUS_INVALID_HANDLE;
+   
+   if (!dev->vlscreen)
+   dev->vlscreen = vl_screen_create(dev->display, dev->screen);
+   if (!dev->vlscreen)
+      return VDP_STATUS_RESOURCES;
+
+   /* XXX: Current limits */ 
+   *is_supported = true;
+   if (surface_chroma_type != VDP_CHROMA_TYPE_420)  {
+	  *is_supported = false;
+	  goto no_sup;
+   }
+
+   max_2d_texture_level = dev->vlscreen->pscreen->get_param( dev->vlscreen->pscreen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS );
+   if (!max_2d_texture_level)  {
+      ret = VDP_STATUS_RESOURCES;
+	  goto no_sup;
+   }
+
+   /* I am not quite sure if it is max_2d_texture_level-1 or just max_2d_texture_level */
+   *max_width = *max_height = pow(2,max_2d_texture_level-1);
+   
+   return VDP_STATUS_OK;
+   no_sup:
+   return ret;
 }
 
 VdpStatus
@@ -65,7 +100,23 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
 
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   vlVdpDevice *dev = vlGetDataHTAB(device);
+   if (!dev)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   if (!dev->vlscreen)
+   dev->vlscreen = vl_screen_create(dev->display, dev->screen);
+   if (!dev->vlscreen)
+      return VDP_STATUS_RESOURCES;
+
+   if (bits_ycbcr_format != VDP_YCBCR_FORMAT_Y8U8V8A8) 
+	                               *is_supported = dev->vlscreen->pscreen->is_format_supported(dev->vlscreen->pscreen,
+                                   FormatToPipe(bits_ycbcr_format),
+                                   PIPE_TEXTURE_2D,
+                                   PIPE_BIND_RENDER_TARGET, 
+                                   PIPE_TEXTURE_GEOM_NON_SQUARE );
+								   
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 8f54ae657ce..27793892185 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -25,14 +25,112 @@
  *
  **************************************************************************/
 
+#ifndef VDPAU_PRIVATE_H
+#define VDPAU_PRIVATE_H
+
+
 #include <vdpau/vdpau.h>
 #include <pipe/p_compiler.h>
+#include <vl_winsys.h>
+#include <assert.h>
+
+#define INFORMATION G3DVL VDPAU Driver Shared Library version VER_MAJOR.VER_MINOR
+#define QUOTEME(x) #x
+#define TOSTRING(x) QUOTEME(x)
+#define INFORMATION_STRING TOSTRING(INFORMATION)
+#define VL_HANDLES
+
+static enum pipe_video_chroma_format TypeToPipe(VdpChromaType vdpau_type)
+{
+   switch (vdpau_type) {
+      case VDP_CHROMA_TYPE_420:
+         return PIPE_VIDEO_CHROMA_FORMAT_420;
+      case VDP_CHROMA_TYPE_422:
+         return PIPE_VIDEO_CHROMA_FORMAT_422;
+      case VDP_CHROMA_TYPE_444:
+         return PIPE_VIDEO_CHROMA_FORMAT_444;
+      default:
+         assert(0);
+   }
+
+   return -1;
+}
+
+static VdpChromaType PipeToType(enum pipe_video_chroma_format pipe_type)
+{
+   switch (pipe_type) {
+      case PIPE_VIDEO_CHROMA_FORMAT_420:
+         return VDP_CHROMA_TYPE_420;
+      case PIPE_VIDEO_CHROMA_FORMAT_422:
+         return VDP_CHROMA_TYPE_422;
+      case PIPE_VIDEO_CHROMA_FORMAT_444:
+         return VDP_CHROMA_TYPE_444;
+      default:
+         assert(0);
+   }
+
+   return -1;
+}
+
+static enum pipe_format FormatToPipe(VdpYCbCrFormat vdpau_format)
+{
+   switch (vdpau_format) {
+      case VDP_YCBCR_FORMAT_NV12:
+         return PIPE_FORMAT_NV12;
+      case VDP_YCBCR_FORMAT_YV12:
+         return PIPE_FORMAT_YV12;
+      case VDP_YCBCR_FORMAT_UYVY:
+         return PIPE_FORMAT_UYVY;
+      case VDP_YCBCR_FORMAT_YUYV:
+         return PIPE_FORMAT_YUYV;
+      case VDP_YCBCR_FORMAT_Y8U8V8A8: /* Not defined in p_format.h */
+         return 0;
+      case VDP_YCBCR_FORMAT_V8U8Y8A8:
+	     return PIPE_FORMAT_VUYA;
+      default:
+         assert(0);
+   }
+
+   return -1;
+}
+
+static VdpYCbCrFormat PipeToFormat(enum pipe_format p_format)
+{
+   switch (p_format) {
+      case PIPE_FORMAT_NV12:
+         return VDP_YCBCR_FORMAT_NV12;
+      case PIPE_FORMAT_YV12:
+         return VDP_YCBCR_FORMAT_YV12;
+      case PIPE_FORMAT_UYVY:
+         return VDP_YCBCR_FORMAT_UYVY;
+      case PIPE_FORMAT_YUYV:
+         return VDP_YCBCR_FORMAT_YUYV;
+      //case PIPE_FORMAT_YUVA:
+        // return VDP_YCBCR_FORMAT_Y8U8V8A8;
+      case PIPE_FORMAT_VUYA:
+	 return VDP_YCBCR_FORMAT_V8U8Y8A8;
+      default:
+         assert(0);
+   }
+
+   return -1;
+}
 
 typedef struct
 {
-   int dummy;
+   void *display;
+   int screen;
+   struct vl_screen *vlscreen;
+   struct vl_context *vctx;
 } vlVdpDevice;
 
+typedef struct
+{
+   struct vl_screen *vlscreen;
+   struct pipe_surface *psurface;
+   enum pipe_video_chroma_format chroma_format; 
+} vlVdpSurface;
+
 typedef uint32_t vlHandle;
 
 boolean vlCreateHTAB(void);
@@ -57,3 +155,10 @@ VdpVideoMixerQueryParameterSupport vlVdpVideoMixerQueryParameterSupport;
 VdpVideoMixerQueryParameterValueRange vlVdpVideoMixerQueryParameterValueRange;
 VdpVideoMixerQueryAttributeSupport vlVdpVideoMixerQueryAttributeSupport;
 VdpVideoMixerQueryAttributeValueRange vlVdpVideoMixerQueryAttributeValueRange;
+VdpVideoSurfaceCreate vlVdpVideoSurfaceCreate;
+VdpVideoSurfaceDestroy vlVdpVideoSurfaceDestroy;
+VdpVideoSurfaceGetParameters vlVdpVideoSurfaceGetParameters;
+VdpVideoSurfaceGetBitsYCbCr vlVdpVideoSurfaceGetBitsYCbCr;
+VdpVideoSurfacePutBitsYCbCr vlVdpVideoSurfacePutBitsYCbCr;
+
+#endif // VDPAU_PRIVATE_H
\ No newline at end of file
diff --git a/src/gallium/targets/Makefile.vdpau b/src/gallium/targets/Makefile.vdpau
index e5c3dad7dad..2accbeb702e 100644
--- a/src/gallium/targets/Makefile.vdpau
+++ b/src/gallium/targets/Makefile.vdpau
@@ -2,6 +2,7 @@
 
 LIBBASENAME = vdpau_g3dvl
 LIBNAME = lib$(LIBBASENAME).so
+VDPAU_LIB_GLOB=lib$(LIBBASENAME).*so*
 VDPAU_MAJOR = 1
 VDPAU_MINOR = 0
 INCLUDES = -I$(TOP)/src/gallium/include \
@@ -9,7 +10,7 @@ INCLUDES = -I$(TOP)/src/gallium/include \
 	   -I$(TOP)/src/gallium/auxiliary \
 	   -I$(TOP)/src/gallium/winsys/g3dvl \
 	   $(DRIVER_INCLUDES)
-DEFINES = -DGALLIUM_TRACE $(DRIVER_DEFINES)
+DEFINES = -DGALLIUM_TRACE -DVER_MAJOR=$(VDPAU_MAJOR) -DVER_MINOR=$(VDPAU_MINOR) $(DRIVER_DEFINES)
 LIBS = $(EXTRA_LIB_PATH) $(DRIVER_LIBS) -lvdpau -lXext -lX11 -lm
 STATE_TRACKER_LIB = $(TOP)/src/gallium/state_trackers/vdpau/libvdpautracker.a
 
@@ -54,8 +55,8 @@ clean:
 	-rm -f *.o *~ *.so $(SYMLINKS)
 	-rm -f depend depend.bak
 
-#install: $(LIBNAME)
-#	$(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
-#	$(MINSTALL) -m 755 $(LIBNAME) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+install: default
+	$(INSTALL) -d $(DESTDIR)$(VDPAU_LIB_INSTALL_DIR)
+	$(MINSTALL) -m 755 $(TOP)/$(LIB_DIR)/gallium/$(VDPAU_LIB_GLOB) $(DESTDIR)$(VDPAU_LIB_INSTALL_DIR)
 
 include depend
diff --git a/src/gallium/tests/python/retrace/README b/src/gallium/tests/python/retrace/README
deleted file mode 100644
index 822cd114044..00000000000
--- a/src/gallium/tests/python/retrace/README
+++ /dev/null
@@ -1,17 +0,0 @@
-This is an application written in python to replay the traces captured by the
- trace pipe driver. 
-
-
-To use it follow the instructions in src/gallium/drivers/trace/README and
-src/gallium/state_trackers/python/README, and then do
-
-  python src/gallium/state_trackers/python/samples/retrace/interpreter.py filename.trace
-
-
-This is still work in progress:
-- not everything is captured/replayed
-  - surface/textures contents
-- any tiny error will result in a crash
-
---
-Jose Fonseca <jrfonseca@tungstengraphics.com>
diff --git a/src/gallium/tests/python/retrace/format.py b/src/gallium/tests/python/retrace/format.py
deleted file mode 100755
index a4285bfe075..00000000000
--- a/src/gallium/tests/python/retrace/format.py
+++ /dev/null
@@ -1,173 +0,0 @@
-#!/usr/bin/env python
-##########################################################################
-#
-# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
-# All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sub license, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
-# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-##########################################################################
-
-
-import sys
-
-
-class Formatter:
-    '''Plain formatter'''
-
-    def __init__(self, stream):
-        self.stream = stream
-
-    def text(self, text):
-        self.stream.write(text)
-
-    def newline(self):
-        self.text('\n')
-
-    def function(self, name):
-        self.text(name)
-
-    def variable(self, name):
-        self.text(name)
-
-    def literal(self, value):
-        self.text(str(value))
-
-    def address(self, addr):
-        self.text(str(addr))
-
-
-class AnsiFormatter(Formatter):
-    '''Formatter for plain-text files which outputs ANSI escape codes. See
-    http://en.wikipedia.org/wiki/ANSI_escape_code for more information
-    concerning ANSI escape codes.
-    '''
-
-    _csi = '\33['
-
-    _normal = '0m'
-    _bold = '1m'
-    _italic = '3m'
-    _red = '31m'
-    _green = '32m'
-    _blue = '34m'
-
-    def _escape(self, code):
-        self.text(self._csi + code)
-
-    def function(self, name):
-        self._escape(self._bold)
-        Formatter.function(self, name)
-        self._escape(self._normal)
-
-    def variable(self, name):
-        self._escape(self._italic)
-        Formatter.variable(self, name)
-        self._escape(self._normal)
-
-    def literal(self, value):
-        self._escape(self._blue)
-        Formatter.literal(self, value)
-        self._escape(self._normal)
-
-    def address(self, value):
-        self._escape(self._green)
-        Formatter.address(self, value)
-        self._escape(self._normal)
-
-
-class WindowsConsoleFormatter(Formatter):
-    '''Formatter for the Windows Console. See 
-    http://code.activestate.com/recipes/496901/ for more information.
-    '''
-
-    STD_INPUT_HANDLE  = -10
-    STD_OUTPUT_HANDLE = -11
-    STD_ERROR_HANDLE  = -12
-
-    FOREGROUND_BLUE      = 0x01
-    FOREGROUND_GREEN     = 0x02
-    FOREGROUND_RED       = 0x04
-    FOREGROUND_INTENSITY = 0x08
-    BACKGROUND_BLUE      = 0x10
-    BACKGROUND_GREEN     = 0x20
-    BACKGROUND_RED       = 0x40
-    BACKGROUND_INTENSITY = 0x80
-
-    _normal = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED
-    _bold = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_INTENSITY
-    _italic = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED
-    _red = FOREGROUND_RED | FOREGROUND_INTENSITY
-    _green = FOREGROUND_GREEN | FOREGROUND_INTENSITY
-    _blue = FOREGROUND_BLUE | FOREGROUND_INTENSITY
-
-    def __init__(self, stream):
-        Formatter.__init__(self, stream)
-
-        if stream is sys.stdin:
-            nStdHandle = self.STD_INPUT_HANDLE
-        elif stream is sys.stdout:
-            nStdHandle = self.STD_OUTPUT_HANDLE
-        elif stream is sys.stderr:
-            nStdHandle = self.STD_ERROR_HANDLE
-        else:
-            nStdHandle = None
-
-        if nStdHandle:
-            import ctypes
-            self.handle = ctypes.windll.kernel32.GetStdHandle(nStdHandle)
-        else:
-            self.handle = None
-
-    def _attribute(self, attr):
-        if self.handle:
-            import ctypes
-            ctypes.windll.kernel32.SetConsoleTextAttribute(self.handle, attr)
-
-    def function(self, name):
-        self._attribute(self._bold)
-        Formatter.function(self, name)
-        self._attribute(self._normal)
-
-    def variable(self, name):
-        self._attribute(self._italic)
-        Formatter.variable(self, name)
-        self._attribute(self._normal)
-
-    def literal(self, value):
-        self._attribute(self._blue)
-        Formatter.literal(self, value)
-        self._attribute(self._normal)
-
-    def address(self, value):
-        self._attribute(self._green)
-        Formatter.address(self, value)
-        self._attribute(self._normal)
-
-
-def DefaultFormatter(stream):
-    if sys.platform in ('linux2', 'cygwin'):
-        return AnsiFormatter(stream)
-    elif sys.platform in ('win32',):
-        return WindowsConsoleFormatter(stream)
-    else:
-        return Formatter(stream)
-
diff --git a/src/gallium/tests/python/retrace/model.py b/src/gallium/tests/python/retrace/model.py
deleted file mode 100755
index d4a079fb1e5..00000000000
--- a/src/gallium/tests/python/retrace/model.py
+++ /dev/null
@@ -1,213 +0,0 @@
-#!/usr/bin/env python
-##########################################################################
-# 
-# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
-# All Rights Reserved.
-# 
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sub license, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-# 
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial portions
-# of the Software.
-# 
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
-# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-# 
-##########################################################################
-
-
-'''Trace data model.'''
-
-
-import sys
-import string
-import format
-
-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
-
-
-class Node:
-    
-    def visit(self, visitor):
-        raise NotImplementedError
-
-    def __str__(self):
-        stream = StringIO()
-        formatter = format.DefaultFormatter(stream)
-        pretty_printer = PrettyPrinter(formatter)
-        self.visit(pretty_printer)
-        return stream.getvalue()
-
-
-class Literal(Node):
-    
-    def __init__(self, value):
-        self.value = value
-
-    def visit(self, visitor):
-        visitor.visit_literal(self)
-
-
-class NamedConstant(Node):
-    
-    def __init__(self, name):
-        self.name = name
-
-    def visit(self, visitor):
-        visitor.visit_named_constant(self)
-    
-
-class Array(Node):
-    
-    def __init__(self, elements):
-        self.elements = elements
-
-    def visit(self, visitor):
-        visitor.visit_array(self)
-
-
-class Struct(Node):
-    
-    def __init__(self, name, members):
-        self.name = name
-        self.members = members        
-
-    def visit(self, visitor):
-        visitor.visit_struct(self)
-
-        
-class Pointer(Node):
-    
-    def __init__(self, address):
-        self.address = address
-
-    def visit(self, visitor):
-        visitor.visit_pointer(self)
-
-
-class Call:
-    
-    def __init__(self, no, klass, method, args, ret):
-        self.no = no
-        self.klass = klass
-        self.method = method
-        self.args = args
-        self.ret = ret
-        
-    def visit(self, visitor):
-        visitor.visit_call(self)
-
-
-class Trace:
-    
-    def __init__(self, calls):
-        self.calls = calls
-        
-    def visit(self, visitor):
-        visitor.visit_trace(self)
-    
-    
-class Visitor:
-    
-    def visit_literal(self, node):
-        raise NotImplementedError
-    
-    def visit_named_constant(self, node):
-        raise NotImplementedError
-    
-    def visit_array(self, node):
-        raise NotImplementedError
-    
-    def visit_struct(self, node):
-        raise NotImplementedError
-    
-    def visit_pointer(self, node):
-        raise NotImplementedError
-    
-    def visit_call(self, node):
-        raise NotImplementedError
-    
-    def visit_trace(self, node):
-        raise NotImplementedError
-
-
-class PrettyPrinter:
-
-    def __init__(self, formatter):
-        self.formatter = formatter
-    
-    def visit_literal(self, node):
-        if isinstance(node.value, basestring):
-            if len(node.value) >= 4096 or node.value.strip(string.printable):
-                self.formatter.text('...')
-                return
-
-            self.formatter.literal('"' + node.value + '"')
-            return
-
-        self.formatter.literal(repr(node.value))
-    
-    def visit_named_constant(self, node):
-        self.formatter.literal(node.name)
-    
-    def visit_array(self, node):
-        self.formatter.text('{')
-        sep = ''
-        for value in node.elements:
-            self.formatter.text(sep)
-            value.visit(self) 
-            sep = ', '
-        self.formatter.text('}')
-    
-    def visit_struct(self, node):
-        self.formatter.text('{')
-        sep = ''
-        for name, value in node.members:
-            self.formatter.text(sep)
-            self.formatter.variable(name)
-            self.formatter.text(' = ')
-            value.visit(self) 
-            sep = ', '
-        self.formatter.text('}')
-    
-    def visit_pointer(self, node):
-        self.formatter.address(node.address)
-    
-    def visit_call(self, node):
-        self.formatter.text('%s ' % node.no)
-        if node.klass is not None:
-            self.formatter.function(node.klass + '::' + node.method)
-        else:
-            self.formatter.function(node.method)
-        self.formatter.text('(')
-        sep = ''
-        for name, value in node.args:
-            self.formatter.text(sep)
-            self.formatter.variable(name)
-            self.formatter.text(' = ')
-            value.visit(self) 
-            sep = ', '
-        self.formatter.text(')')
-        if node.ret is not None:
-            self.formatter.text(' = ')
-            node.ret.visit(self)
-    
-    def visit_trace(self, node):
-        for call in node.calls:
-            call.visit(self)
-            self.formatter.newline()
-
diff --git a/src/gallium/tests/python/retrace/parse.py b/src/gallium/tests/python/retrace/parse.py
deleted file mode 100755
index b08d3686715..00000000000
--- a/src/gallium/tests/python/retrace/parse.py
+++ /dev/null
@@ -1,392 +0,0 @@
-#!/usr/bin/env python
-##########################################################################
-# 
-# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
-# All Rights Reserved.
-# 
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sub license, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-# 
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial portions
-# of the Software.
-# 
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
-# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-# 
-##########################################################################
-
-
-import sys
-import xml.parsers.expat
-import binascii
-import optparse
-
-from model import *
-
-
-ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF = range(4)
-
-
-class XmlToken:
-
-    def __init__(self, type, name_or_data, attrs = None, line = None, column = None):
-        assert type in (ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF)
-        self.type = type
-        self.name_or_data = name_or_data
-        self.attrs = attrs
-        self.line = line
-        self.column = column
-
-    def __str__(self):
-        if self.type == ELEMENT_START:
-            return '<' + self.name_or_data + ' ...>'
-        if self.type == ELEMENT_END:
-            return '</' + self.name_or_data + '>'
-        if self.type == CHARACTER_DATA:
-            return self.name_or_data
-        if self.type == EOF:
-            return 'end of file'
-        assert 0
-
-
-class XmlTokenizer:
-    """Expat based XML tokenizer."""
-
-    def __init__(self, fp, skip_ws = True):
-        self.fp = fp
-        self.tokens = []
-        self.index = 0
-        self.final = False
-        self.skip_ws = skip_ws
-        
-        self.character_pos = 0, 0
-        self.character_data = ''
-        
-        self.parser = xml.parsers.expat.ParserCreate()
-        self.parser.StartElementHandler  = self.handle_element_start
-        self.parser.EndElementHandler    = self.handle_element_end
-        self.parser.CharacterDataHandler = self.handle_character_data
-    
-    def handle_element_start(self, name, attributes):
-        self.finish_character_data()
-        line, column = self.pos()
-        token = XmlToken(ELEMENT_START, name, attributes, line, column)
-        self.tokens.append(token)
-    
-    def handle_element_end(self, name):
-        self.finish_character_data()
-        line, column = self.pos()
-        token = XmlToken(ELEMENT_END, name, None, line, column)
-        self.tokens.append(token)
-
-    def handle_character_data(self, data):
-        if not self.character_data:
-            self.character_pos = self.pos()
-        self.character_data += data
-    
-    def finish_character_data(self):
-        if self.character_data:
-            if not self.skip_ws or not self.character_data.isspace(): 
-                line, column = self.character_pos
-                token = XmlToken(CHARACTER_DATA, self.character_data, None, line, column)
-                self.tokens.append(token)
-            self.character_data = ''
-    
-    def next(self):
-        size = 16*1024
-        while self.index >= len(self.tokens) and not self.final:
-            self.tokens = []
-            self.index = 0
-            data = self.fp.read(size)
-            self.final = len(data) < size
-            data = data.rstrip('\0')
-            try:
-                self.parser.Parse(data, self.final)
-            except xml.parsers.expat.ExpatError, e:
-                #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS:
-                if e.code == 3:
-                    pass
-                else:
-                    raise e
-        if self.index >= len(self.tokens):
-            line, column = self.pos()
-            token = XmlToken(EOF, None, None, line, column)
-        else:
-            token = self.tokens[self.index]
-            self.index += 1
-        return token
-
-    def pos(self):
-        return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber
-
-
-class TokenMismatch(Exception):
-
-    def __init__(self, expected, found):
-        self.expected = expected
-        self.found = found
-
-    def __str__(self):
-        return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found))
-
-
-
-class XmlParser:
-    """Base XML document parser."""
-
-    def __init__(self, fp):
-        self.tokenizer = XmlTokenizer(fp)
-        self.consume()
-    
-    def consume(self):
-        self.token = self.tokenizer.next()
-
-    def match_element_start(self, name):
-        return self.token.type == ELEMENT_START and self.token.name_or_data == name
-    
-    def match_element_end(self, name):
-        return self.token.type == ELEMENT_END and self.token.name_or_data == name
-
-    def element_start(self, name):
-        while self.token.type == CHARACTER_DATA:
-            self.consume()
-        if self.token.type != ELEMENT_START:
-            raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token)
-        if self.token.name_or_data != name:
-            raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token)
-        attrs = self.token.attrs
-        self.consume()
-        return attrs
-    
-    def element_end(self, name):
-        while self.token.type == CHARACTER_DATA:
-            self.consume()
-        if self.token.type != ELEMENT_END:
-            raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
-        if self.token.name_or_data != name:
-            raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token)
-        self.consume()
-
-    def character_data(self, strip = True):
-        data = ''
-        while self.token.type == CHARACTER_DATA:
-            data += self.token.name_or_data
-            self.consume()
-        if strip:
-            data = data.strip()
-        return data
-
-
-class TraceParser(XmlParser):
-
-    def __init__(self, fp):
-        XmlParser.__init__(self, fp)
-        self.last_call_no = 0
-    
-    def parse(self):
-        self.element_start('trace')
-        while self.token.type not in (ELEMENT_END, EOF):
-            call = self.parse_call()
-            self.handle_call(call)
-        if self.token.type != EOF:
-            self.element_end('trace')
-
-    def parse_call(self):
-        attrs = self.element_start('call')
-        try:
-            no = int(attrs['no'])
-        except KeyError:
-            self.last_call_no += 1
-            no = self.last_call_no
-        else:
-            self.last_call_no = no
-        klass = attrs['class']
-        method = attrs['method']
-        args = []
-        ret = None
-        while self.token.type == ELEMENT_START:
-            if self.token.name_or_data == 'arg':
-                arg = self.parse_arg()
-                args.append(arg)
-            elif self.token.name_or_data == 'ret':
-                ret = self.parse_ret()
-            elif self.token.name_or_data == 'call':
-                # ignore nested function calls
-                self.parse_call()
-            else:
-                raise TokenMismatch("<arg ...> or <ret ...>", self.token)
-        self.element_end('call')
-        
-        return Call(no, klass, method, args, ret)
-
-    def parse_arg(self):
-        attrs = self.element_start('arg')
-        name = attrs['name']
-        value = self.parse_value()
-        self.element_end('arg')
-
-        return name, value
-
-    def parse_ret(self):
-        attrs = self.element_start('ret')
-        value = self.parse_value()
-        self.element_end('ret')
-
-        return value
-
-    def parse_value(self):
-        expected_tokens = ('null', 'bool', 'int', 'uint', 'float', 'string', 'enum', 'array', 'struct', 'ptr', 'bytes')
-        if self.token.type == ELEMENT_START:
-            if self.token.name_or_data in expected_tokens:
-                method = getattr(self, 'parse_' +  self.token.name_or_data)
-                return method()
-        raise TokenMismatch(" or " .join(expected_tokens), self.token)
-
-    def parse_null(self):
-        self.element_start('null')
-        self.element_end('null')
-        return Literal(None)
-        
-    def parse_bool(self):
-        self.element_start('bool')
-        value = int(self.character_data())
-        self.element_end('bool')
-        return Literal(value)
-        
-    def parse_int(self):
-        self.element_start('int')
-        value = int(self.character_data())
-        self.element_end('int')
-        return Literal(value)
-        
-    def parse_uint(self):
-        self.element_start('uint')
-        value = int(self.character_data())
-        self.element_end('uint')
-        return Literal(value)
-        
-    def parse_float(self):
-        self.element_start('float')
-        value = float(self.character_data())
-        self.element_end('float')
-        return Literal(value)
-        
-    def parse_enum(self):
-        self.element_start('enum')
-        name = self.character_data()
-        self.element_end('enum')
-        return NamedConstant(name)
-        
-    def parse_string(self):
-        self.element_start('string')
-        value = self.character_data()
-        self.element_end('string')
-        return Literal(value)
-        
-    def parse_bytes(self):
-        self.element_start('bytes')
-        value = binascii.a2b_hex(self.character_data())
-        self.element_end('bytes')
-        return Literal(value)
-        
-    def parse_array(self):
-        self.element_start('array')
-        elems = []
-        while self.token.type != ELEMENT_END:
-            elems.append(self.parse_elem())
-        self.element_end('array')
-        return Array(elems)
-
-    def parse_elem(self):
-        self.element_start('elem')
-        value = self.parse_value()
-        self.element_end('elem')
-        return value
-
-    def parse_struct(self):
-        attrs = self.element_start('struct')
-        name = attrs['name']
-        members = []
-        while self.token.type != ELEMENT_END:
-            members.append(self.parse_member())
-        self.element_end('struct')
-        return Struct(name, members)
-
-    def parse_member(self):
-        attrs = self.element_start('member')
-        name = attrs['name']
-        value = self.parse_value()
-        self.element_end('member')
-
-        return name, value
-
-    def parse_ptr(self):
-        self.element_start('ptr')
-        address = self.character_data()
-        self.element_end('ptr')
-
-        return Pointer(address)
-
-    def handle_call(self, call):
-        pass
-    
-    
-class TraceDumper(TraceParser):
-    
-    def __init__(self, fp):
-        TraceParser.__init__(self, fp)
-        self.formatter = format.DefaultFormatter(sys.stdout)
-        self.pretty_printer = PrettyPrinter(self.formatter)
-
-    def handle_call(self, call):
-        call.visit(self.pretty_printer)
-        self.formatter.newline()
-        
-
-class Main:
-    '''Common main class for all retrace command line utilities.''' 
-
-    def __init__(self):
-        pass
-
-    def main(self):
-        optparser = self.get_optparser()
-        (options, args) = optparser.parse_args(sys.argv[1:])
-    
-        if args:
-            for arg in args:
-                if arg.endswith('.gz'):
-                    from gzip import GzipFile
-                    stream = GzipFile(arg, 'rt')
-                elif arg.endswith('.bz2'):
-                    from bz2 import BZ2File
-                    stream = BZ2File(arg, 'rU')
-                else:
-                    stream = open(arg, 'rt')
-                self.process_arg(stream, options)
-        else:
-            self.process_arg(stream, options)
-
-    def get_optparser(self):
-        optparser = optparse.OptionParser(
-            usage="\n\t%prog [options] [traces] ...")
-        return optparser
-
-    def process_arg(self, stream, options):
-        parser = TraceDumper(stream)
-        parser.parse()
-
-
-if __name__ == '__main__':
-    Main().main()
diff --git a/src/gallium/tests/python/retrace/parser.py b/src/gallium/tests/python/retrace/parser.py
deleted file mode 100755
index bd47c9a6b06..00000000000
--- a/src/gallium/tests/python/retrace/parser.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python
-##########################################################################
-# 
-# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
-# All Rights Reserved.
-# 
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sub license, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-# 
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial portions
-# of the Software.
-# 
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
-# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-# 
-##########################################################################
-
-
-from parse import *
-
-
-if __name__ == '__main__':
-    Main().main()
diff --git a/src/gallium/tests/python/samples/gs.py b/src/gallium/tests/python/samples/gs.py
deleted file mode 100644
index 936c0b3f33a..00000000000
--- a/src/gallium/tests/python/samples/gs.py
+++ /dev/null
@@ -1,254 +0,0 @@
-#!/usr/bin/env python
-##########################################################################
-#
-# Copyright 2009 VMware
-# All Rights Reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sub license, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial portions
-# of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
-# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-##########################################################################
-
-
-from gallium import *
-
-
-def make_image(surface):
-    data = surface.get_tile_rgba8(0, 0, surface.width, surface.height)
-
-    import Image
-    outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1)
-    return outimage
-
-def save_image(filename, surface):
-    outimage = make_image(surface)
-    outimage.save(filename, "PNG")
-
-def show_image(surface):
-    outimage = make_image(surface)
-
-    import Tkinter as tk
-    from PIL import Image, ImageTk
-    root = tk.Tk()
-
-    root.title('background image')
-
-    image1 = ImageTk.PhotoImage(outimage)
-    w = image1.width()
-    h = image1.height()
-    x = 100
-    y = 100
-    root.geometry("%dx%d+%d+%d" % (w, h, x, y))
-    panel1 = tk.Label(root, image=image1)
-    panel1.pack(side='top', fill='both', expand='yes')
-    panel1.image = image1
-    root.mainloop()
-
-
-def test(dev):
-    ctx = dev.context_create()
-
-    width = 255
-    height = 255
-    minz = 0.0
-    maxz = 1.0
-
-    # disabled blending/masking
-    blend = Blend()
-    blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE
-    blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE
-    blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO
-    blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO
-    blend.rt[0].colormask = PIPE_MASK_RGBA
-    ctx.set_blend(blend)
-
-    # depth/stencil/alpha
-    depth_stencil_alpha = DepthStencilAlpha()
-    depth_stencil_alpha.depth.enabled = 1
-    depth_stencil_alpha.depth.writemask = 1
-    depth_stencil_alpha.depth.func = PIPE_FUNC_LESS
-    ctx.set_depth_stencil_alpha(depth_stencil_alpha)
-
-    # rasterizer
-    rasterizer = Rasterizer()
-    rasterizer.front_winding = PIPE_WINDING_CW
-    rasterizer.cull_mode = PIPE_WINDING_NONE
-    rasterizer.scissor = 1
-    ctx.set_rasterizer(rasterizer)
-
-    # viewport
-    viewport = Viewport()
-    scale = FloatArray(4)
-    scale[0] = width / 2.0
-    scale[1] = -height / 2.0
-    scale[2] = (maxz - minz) / 2.0
-    scale[3] = 1.0
-    viewport.scale = scale
-    translate = FloatArray(4)
-    translate[0] = width / 2.0
-    translate[1] = height / 2.0
-    translate[2] = (maxz - minz) / 2.0
-    translate[3] = 0.0
-    viewport.translate = translate
-    ctx.set_viewport(viewport)
-
-    # samplers
-    sampler = Sampler()
-    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-    sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-    sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE
-    sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST
-    sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST
-    sampler.normalized_coords = 1
-    ctx.set_sampler(0, sampler)
-
-    # scissor
-    scissor = Scissor()
-    scissor.minx = 0
-    scissor.miny = 0
-    scissor.maxx = width
-    scissor.maxy = height
-    ctx.set_scissor(scissor)
-
-    clip = Clip()
-    clip.nr = 0
-    ctx.set_clip(clip)
-
-    # framebuffer
-    cbuf = dev.resource_create(
-        PIPE_FORMAT_B8G8R8X8_UNORM,
-        width, height,
-        bind=PIPE_BIND_RENDER_TARGET,
-    ).get_surface()
-    zbuf = dev.resource_create(
-        PIPE_FORMAT_Z32_UNORM,
-        width, height,
-        bind=PIPE_BIND_DEPTH_STENCIL,
-    ).get_surface()
-    fb = Framebuffer()
-    fb.width = width
-    fb.height = height
-    fb.nr_cbufs = 1
-    fb.set_cbuf(0, cbuf)
-    fb.set_zsbuf(zbuf)
-    ctx.set_framebuffer(fb)
-    rgba = FloatArray(4);
-    rgba[0] = 0.0
-    rgba[1] = 0.0
-    rgba[2] = 0.0
-    rgba[3] = 0.0
-    ctx.clear(PIPE_CLEAR_COLOR | PIPE_CLEAR_DEPTHSTENCIL, rgba, 1.0, 0xff)
-
-    # vertex shader
-    vs = Shader('''
-        VERT
-        DCL IN[0], POSITION, CONSTANT
-        DCL IN[1], COLOR, CONSTANT
-        DCL OUT[0], POSITION, CONSTANT
-        DCL OUT[1], COLOR, CONSTANT
-        0:MOV OUT[0], IN[0]
-        1:MOV OUT[1], IN[1]
-        2:END
-    ''')
-    ctx.set_vertex_shader(vs)
-
-    gs = Shader('''
-        GEOM
-        PROPERTY GS_INPUT_PRIMITIVE TRIANGLES
-        PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP
-        DCL IN[][0], POSITION, CONSTANT
-        DCL IN[][1], COLOR, CONSTANT
-        DCL OUT[0], POSITION, CONSTANT
-        DCL OUT[1], COLOR, CONSTANT
-        0:MOV OUT[0], IN[0][0]
-        1:MOV OUT[1], IN[0][1]
-        2:EMIT
-        3:MOV OUT[0], IN[1][0]
-        4:MOV OUT[1], IN[1][1]
-        5:EMIT
-        6:MOV OUT[0], IN[2][0]
-        7:MOV OUT[1], IN[2][1]
-        8:EMIT
-        9:ENDPRIM
-        10:END
-    ''')
-    ctx.set_geometry_shader(gs)
-
-    # fragment shader
-    fs = Shader('''
-        FRAG
-        DCL IN[0], COLOR, LINEAR
-        DCL OUT[0], COLOR, CONSTANT
-        0:MOV OUT[0], IN[0]
-        1:END
-    ''')
-    ctx.set_fragment_shader(fs)
-
-    nverts = 3
-    nattrs = 2
-    verts = FloatArray(nverts * nattrs * 4)
-
-    verts[ 0] =   0.0 # x1
-    verts[ 1] =   0.8 # y1
-    verts[ 2] =   0.2 # z1
-    verts[ 3] =   1.0 # w1
-    verts[ 4] =   1.0 # r1
-    verts[ 5] =   0.0 # g1
-    verts[ 6] =   0.0 # b1
-    verts[ 7] =   1.0 # a1
-    verts[ 8] =  -0.8 # x2
-    verts[ 9] =  -0.8 # y2
-    verts[10] =   0.5 # z2
-    verts[11] =   1.0 # w2
-    verts[12] =   0.0 # r2
-    verts[13] =   1.0 # g2
-    verts[14] =   0.0 # b2
-    verts[15] =   1.0 # a2
-    verts[16] =   0.8 # x3
-    verts[17] =  -0.8 # y3
-    verts[18] =   0.8 # z3
-    verts[19] =   1.0 # w3
-    verts[20] =   0.0 # r3
-    verts[21] =   0.0 # g3
-    verts[22] =   1.0 # b3
-    verts[23] =   1.0 # a3
-
-    ctx.draw_vertices(PIPE_PRIM_TRIANGLES,
-                      nverts,
-                      nattrs,
-                      verts)
-
-    ctx.flush()
-
-    show_image(cbuf)
-    #show_image(zbuf)
-    #save_image('cbuf.png', cbuf)
-    #save_image('zbuf.png', zbuf)
-
-
-
-def main():
-    dev = Device()
-    test(dev)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/src/gallium/tests/python/samples/tri.py b/src/gallium/tests/python/samples/tri.py
deleted file mode 100644
index fed929d4200..00000000000
--- a/src/gallium/tests/python/samples/tri.py
+++ /dev/null
@@ -1,233 +0,0 @@
-#!/usr/bin/env python
-##########################################################################
-# 
-# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
-# All Rights Reserved.
-# 
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sub license, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-# 
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial portions
-# of the Software.
-# 
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
-# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-# 
-##########################################################################
-
-
-from gallium import *
-
-
-def make_image(ctx, surface):
-    data = ctx.surface_read_rgba8(surface, 0, 0, surface.width, surface.height)
-
-    import Image
-    outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1)
-    return outimage
-
-def save_image(ctx, surface, filename):
-    outimage = make_image(ctx, surface)
-    outimage.save(filename, "PNG")
-
-def show_image(ctx, surface):
-    outimage = make_image(ctx, surface)
-    
-    import Tkinter as tk
-    from PIL import Image, ImageTk
-    root = tk.Tk()
-    
-    root.title('background image')
-    
-    image1 = ImageTk.PhotoImage(outimage)
-    w = image1.width()
-    h = image1.height()
-    x = 100
-    y = 100
-    root.geometry("%dx%d+%d+%d" % (w, h, x, y))
-    panel1 = tk.Label(root, image=image1)
-    panel1.pack(side='top', fill='both', expand='yes')
-    panel1.image = image1
-    root.mainloop()
-
-
-def test(dev):
-    ctx = dev.context_create()
-
-    width = 255
-    height = 255
-    minz = 0.0
-    maxz = 1.0
-
-    # disabled blending/masking
-    blend = Blend()
-    blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE
-    blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE
-    blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO
-    blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO
-    blend.rt[0].colormask = PIPE_MASK_RGBA
-    ctx.set_blend(blend)
-
-    # depth/stencil/alpha
-    depth_stencil_alpha = DepthStencilAlpha()
-    depth_stencil_alpha.depth.enabled = 1
-    depth_stencil_alpha.depth.writemask = 1
-    depth_stencil_alpha.depth.func = PIPE_FUNC_LESS
-    ctx.set_depth_stencil_alpha(depth_stencil_alpha)
-
-    # rasterizer
-    rasterizer = Rasterizer()
-    rasterizer.front_winding = PIPE_WINDING_CW
-    rasterizer.cull_mode = PIPE_WINDING_NONE
-    rasterizer.scissor = 1
-    ctx.set_rasterizer(rasterizer)
-
-    # viewport
-    viewport = Viewport()
-    scale = FloatArray(4)
-    scale[0] = width / 2.0
-    scale[1] = -height / 2.0
-    scale[2] = (maxz - minz) / 2.0
-    scale[3] = 1.0
-    viewport.scale = scale
-    translate = FloatArray(4)
-    translate[0] = width / 2.0
-    translate[1] = height / 2.0
-    translate[2] = (maxz - minz) / 2.0
-    translate[3] = 0.0
-    viewport.translate = translate
-    ctx.set_viewport(viewport)
-
-    # samplers
-    sampler = Sampler()
-    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-    sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-    sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE
-    sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST
-    sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST
-    sampler.normalized_coords = 1
-    ctx.set_fragment_sampler(0, sampler)
-
-    # scissor
-    scissor = Scissor()
-    scissor.minx = 0
-    scissor.miny = 0
-    scissor.maxx = width
-    scissor.maxy = height
-    ctx.set_scissor(scissor)
-
-    # clip
-    clip = Clip()
-    clip.nr = 0
-    ctx.set_clip(clip)
-
-    # framebuffer
-    cbuf = dev.resource_create(
-        PIPE_FORMAT_B8G8R8X8_UNORM, 
-        width, height,
-        bind=PIPE_BIND_RENDER_TARGET,
-    ).get_surface()
-    zbuf = dev.resource_create(
-        PIPE_FORMAT_Z32_UNORM,
-        width, height,
-        bind=PIPE_BIND_DEPTH_STENCIL,
-    ).get_surface()
-    fb = Framebuffer()
-    fb.width = width
-    fb.height = height
-    fb.nr_cbufs = 1
-    fb.set_cbuf(0, cbuf)
-    fb.set_zsbuf(zbuf)
-    ctx.set_framebuffer(fb)
-    rgba = FloatArray(4);
-    rgba[0] = 0.0
-    rgba[1] = 0.0
-    rgba[2] = 0.0
-    rgba[3] = 0.0
-    ctx.clear(PIPE_CLEAR_COLOR | PIPE_CLEAR_DEPTHSTENCIL, rgba, 1.0, 0xff)
-    
-    # vertex shader
-    vs = Shader('''
-        VERT
-        DCL IN[0], POSITION, CONSTANT
-        DCL IN[1], COLOR, CONSTANT
-        DCL OUT[0], POSITION, CONSTANT
-        DCL OUT[1], COLOR, CONSTANT
-        0:MOV OUT[0], IN[0]
-        1:MOV OUT[1], IN[1]
-        2:END
-    ''')
-    ctx.set_vertex_shader(vs)
-
-    # fragment shader
-    fs = Shader('''
-        FRAG
-        DCL IN[0], COLOR, LINEAR
-        DCL OUT[0], COLOR, CONSTANT
-        0:MOV OUT[0], IN[0]
-        1:END
-    ''')
-    ctx.set_fragment_shader(fs)
-
-    nverts = 3
-    nattrs = 2
-    verts = FloatArray(nverts * nattrs * 4)
-
-    verts[ 0] =   0.0 # x1
-    verts[ 1] =   0.8 # y1
-    verts[ 2] =   0.2 # z1
-    verts[ 3] =   1.0 # w1
-    verts[ 4] =   1.0 # r1
-    verts[ 5] =   0.0 # g1
-    verts[ 6] =   0.0 # b1
-    verts[ 7] =   1.0 # a1
-    verts[ 8] =  -0.8 # x2
-    verts[ 9] =  -0.8 # y2
-    verts[10] =   0.5 # z2
-    verts[11] =   1.0 # w2
-    verts[12] =   0.0 # r2
-    verts[13] =   1.0 # g2
-    verts[14] =   0.0 # b2
-    verts[15] =   1.0 # a2
-    verts[16] =   0.8 # x3
-    verts[17] =  -0.8 # y3
-    verts[18] =   0.8 # z3
-    verts[19] =   1.0 # w3
-    verts[20] =   0.0 # r3
-    verts[21] =   0.0 # g3
-    verts[22] =   1.0 # b3
-    verts[23] =   1.0 # a3
-
-    ctx.draw_vertices(PIPE_PRIM_TRIANGLES,
-                      nverts, 
-                      nattrs, 
-                      verts)
-
-    ctx.flush()
-    
-    show_image(ctx, cbuf)
-    show_image(ctx, zbuf)
-    save_image(ctx, cbuf, 'cbuf.png')
-    save_image(ctx, zbuf, 'zbuf.png')
-
-
-
-def main():
-    dev = Device()
-    test(dev)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/src/gallium/tests/python/tests/.gitignore b/src/gallium/tests/python/tests/.gitignore
deleted file mode 100644
index 0dbbaeea16b..00000000000
--- a/src/gallium/tests/python/tests/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.txt
-*.tsv
-*.dot
diff --git a/src/gallium/tests/python/tests/base.py b/src/gallium/tests/python/tests/base.py
deleted file mode 100755
index d8cf84db363..00000000000
--- a/src/gallium/tests/python/tests/base.py
+++ /dev/null
@@ -1,399 +0,0 @@
-#!/usr/bin/env python
-##########################################################################
-# 
-# Copyright 2009 VMware, Inc.
-# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
-# All Rights Reserved.
-# 
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sub license, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-# 
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial portions
-# of the Software.
-# 
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
-# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-# 
-##########################################################################
-
-
-"""Base classes for tests.
-
-Loosely inspired on Python's unittest module.
-"""
-
-
-import os.path
-import sys
-
-from gallium import *
-
-
-# Enumerate all pixel formats
-formats = {}
-for name, value in globals().items():
-    if name.startswith("PIPE_FORMAT_") and isinstance(value, int) and name not in ("PIPE_FORMAT_NONE", "PIPE_FORMAT_COUNT"):
-        formats[value] = name
-
-def make_image(width, height, rgba):
-    import Image
-    outimage = Image.new(
-        mode='RGB',
-        size=(width, height),
-        color=(0,0,0))
-    outpixels = outimage.load()
-    for y in range(0, height):
-        for x in range(0, width):
-            offset = (y*width + x)*4
-            r, g, b, a = [int(min(max(rgba[offset + ch], 0.0), 1.0)*255) for ch in range(4)]
-            outpixels[x, y] = r, g, b
-    return outimage
-
-def save_image(width, height, rgba, filename):
-    outimage = make_image(width, height, rgba)
-    outimage.save(filename, "PNG")
-
-def show_image(width, height, **rgbas):
-    import Tkinter as tk
-    from PIL import Image, ImageTk
-
-    root = tk.Tk()
-    
-    x = 64
-    y = 64
-    
-    labels = rgbas.keys()
-    labels.sort() 
-    for i in range(len(labels)):
-        label = labels[i]
-        outimage = make_image(width, height, rgbas[label])
-        
-        if i:
-            window = tk.Toplevel(root)
-        else:
-            window = root    
-        window.title(label)
-        image1 = ImageTk.PhotoImage(outimage)
-        w = image1.width()
-        h = image1.height()
-        window.geometry("%dx%d+%d+%d" % (w, h, x, y))
-        panel1 = tk.Label(window, image=image1)
-        panel1.pack(side='top', fill='both', expand='yes')
-        panel1.image = image1
-        x += w + 2
-    
-    root.mainloop()
-
-
-class TestFailure(Exception):
-
-    pass
-
-class TestSkip(Exception):
-    
-    pass
-
-
-class Test:
-
-    def __init__(self):
-        pass
-
-    def _run(self, result):
-        raise NotImplementedError
-    
-    def run(self):
-        result = TestResult()
-        self._run(result)
-        result.report()
-
-    def assert_rgba(self, ctx, surface, x, y, w, h, expected_rgba, pixel_tol=4.0/256, surface_tol=0.85):
-        total = h*w
-        different = ctx.surface_compare_rgba(surface, x, y, w, h, expected_rgba, tol=pixel_tol)
-        if different:
-            sys.stderr.write("%u out of %u pixels differ\n" % (different, total))
-
-        if float(total - different)/float(total) < surface_tol:
-            if 0:
-                rgba = FloatArray(h*w*4)
-                ctx.surface_read_rgba(surface, x, y, w, h, rgba)
-                show_image(w, h, Result=rgba, Expected=expected_rgba)
-                save_image(w, h, rgba, "result.png")
-                save_image(w, h, expected_rgba, "expected.png")
-            #sys.exit(0)
-            
-            raise TestFailure
-
-
-class TestCase(Test):
-    
-    tags = ()
-
-    def __init__(self, dev, **kargs):
-        Test.__init__(self)
-        self.dev = dev
-        self.__dict__.update(kargs)
-
-    def description(self):
-        descriptions = []
-        for tag in self.tags:
-            value = self.get(tag)
-            if value is not None and value != '':
-                descriptions.append(tag + '=' + str(value))
-        return ' '.join(descriptions)
-
-    def get(self, tag):
-        try:
-            method = getattr(self, '_get_' + tag)
-        except AttributeError:
-            return getattr(self, tag, None)
-        else:
-            return method()
-
-    def _get_target(self):
-        return {
-            PIPE_TEXTURE_1D: "1d", 
-            PIPE_TEXTURE_2D: "2d", 
-            PIPE_TEXTURE_3D: "3d", 
-            PIPE_TEXTURE_CUBE: "cube",
-        }[self.target]
-
-    def _get_format(self):
-        name = formats[self.format]
-        if name.startswith('PIPE_FORMAT_'):
-            name  = name[12:]
-        name = name.lower()
-        return name
-
-    def _get_face(self):
-        if self.target == PIPE_TEXTURE_CUBE:
-            return {
-                PIPE_TEX_FACE_POS_X: "+x",
-                PIPE_TEX_FACE_NEG_X: "-x",
-                PIPE_TEX_FACE_POS_Y: "+y",
-                PIPE_TEX_FACE_NEG_Y: "-y", 
-                PIPE_TEX_FACE_POS_Z: "+z", 
-                PIPE_TEX_FACE_NEG_Z: "-z",
-            }[self.face]
-        else:
-            return ''
-
-    def test(self):
-        raise NotImplementedError
-    
-    def _run(self, result):
-        result.test_start(self)
-        try:
-            self.test()
-        except KeyboardInterrupt:
-            raise
-        except TestSkip:
-            result.test_skipped(self)
-        except TestFailure:
-            result.test_failed(self)
-        else:
-            result.test_passed(self)
-
-
-class TestSuite(Test):
-    
-    def __init__(self, tests = None):
-        Test.__init__(self)
-        if tests is None:
-            self.tests = []
-        else:
-            self.tests = tests
-
-    def add_test(self, test):
-        self.tests.append(test) 
-    
-    def _run(self, result):
-        for test in self.tests:
-            test._run(result)
-
-
-class TestResult:
-    
-    def __init__(self):
-        self.tests = 0
-        self.passed = 0
-        self.skipped = 0
-        self.failed = 0
-
-        self.names = ['result']
-        self.types = ['pass skip fail']
-        self.rows = []
-    
-    def test_start(self, test):
-        sys.stdout.write("Running %s...\n" % test.description())
-        sys.stdout.flush()
-        self.tests += 1
-    
-    def test_passed(self, test):
-        sys.stdout.write("PASS\n")
-        sys.stdout.flush()
-        self.passed += 1
-        self.log_result(test, 'pass')
-            
-    def test_skipped(self, test):
-        sys.stdout.write("SKIP\n")
-        sys.stdout.flush()
-        self.skipped += 1
-        self.log_result(test, 'skip')
-        
-    def test_failed(self, test):
-        sys.stdout.write("FAIL\n")
-        sys.stdout.flush()
-        self.failed += 1
-        self.log_result(test, 'fail')
-
-    def log_result(self, test, result):
-        row = ['']*len(self.names)
-
-        # add result
-        assert self.names[0] == 'result'
-        assert result in ('pass', 'skip', 'fail')
-        row[0] = result
-
-        # add tags
-        for tag in test.tags:
-            value = test.get(tag)
-
-            # infer type
-            if value is None:
-                continue
-            elif isinstance(value, (int, float)):
-                value = str(value)
-                type = 'c' # continous
-            elif isinstance(value, basestring):
-                type = 'd' # discrete
-            else:
-                assert False
-                value = str(value)
-                type = 'd' # discrete
-
-            # insert value
-            try:
-                col = self.names.index(tag, 1)
-            except ValueError:
-                self.names.append(tag)
-                self.types.append(type)
-                row.append(value)
-            else:
-                row[col] = value
-                assert self.types[col] == type
-        
-        self.rows.append(row)
-
-    def report(self):
-        sys.stdout.write("%u tests, %u passed, %u skipped, %u failed\n\n" % (self.tests, self.passed, self.skipped, self.failed))
-        sys.stdout.flush()
-
-        name, ext = os.path.splitext(os.path.basename(sys.argv[0]))
-
-        tree = self.report_tree(name)
-        self.report_junit(name, stdout=tree)
-
-    def report_tree(self, name):
-        filename = name + '.tsv'
-        stream = file(filename, 'wt')
-
-        # header
-        stream.write('\t'.join(self.names) + '\n')
-        stream.write('\t'.join(self.types) + '\n')
-        stream.write('class\n')
-
-        # rows
-        for row in self.rows:
-            if row[0] == 'skip':
-                continue
-            row += ['']*(len(self.names) - len(row))
-            stream.write('\t'.join(row) + '\n')
-
-        stream.close()
-
-        # See http://www.ailab.si/orange/doc/ofb/c_otherclass.htm
-        try:
-            import orange
-            import orngTree
-        except ImportError:
-            sys.stderr.write('Install Orange from http://www.ailab.si/orange/ for a classification tree.\n')
-            return None
-
-        data = orange.ExampleTable(filename)
-
-        tree = orngTree.TreeLearner(data, sameMajorityPruning=1, mForPruning=2)
-
-        orngTree.printTxt(tree, maxDepth=4)
-
-        text_tree = orngTree.dumpTree(tree)
-
-        file(name + '.txt', 'wt').write(text_tree)
-
-        orngTree.printDot(tree, fileName=name+'.dot', nodeShape='ellipse', leafShape='box')
-
-        return text_tree
-    
-    def report_junit(self, name, stdout=None, stderr=None):
-        """Write test results in ANT's junit XML format, to use with Hudson CI.
-
-        See also:
-        - http://fisheye.hudson-ci.org/browse/Hudson/trunk/hudson/main/core/src/test/resources/hudson/tasks/junit
-        - http://www.junit.org/node/399
-        - http://wiki.apache.org/ant/Proposals/EnhancedTestReports
-        """
-
-        stream = file(name + '.xml', 'wt')
-
-        stream.write('<?xml version="1.0" encoding="UTF-8" ?>\n')
-        stream.write('<testsuite name="%s">\n' % self.escape_xml(name))
-        stream.write('  <properties>\n')
-        stream.write('  </properties>\n')
-
-        names = self.names[1:]
-
-        for row in self.rows:
-
-            test_name = ' '.join(['%s=%s' % pair for pair in zip(self.names[1:], row[1:])])
-
-            stream.write('  <testcase name="%s">\n' % (self.escape_xml(test_name)))
-
-            result = row[0]
-            if result == 'pass':
-                pass
-            elif result == 'skip':
-                stream.write('    <skipped/>\n')
-            else:
-                stream.write('    <failure/>\n')
-            
-            stream.write('  </testcase>\n')
-
-        if stdout:
-            stream.write('  <system-out>%s</system-out>\n' % self.escape_xml(stdout))
-        if stderr:
-            stream.write('  <system-err>%s</system-err>\n' % self.escape_xml(stderr))
-
-        stream.write('</testsuite>\n')
-
-        stream.close()
-
-    def escape_xml(self, s):
-        '''Escape a XML string.'''
-        s = s.replace('&', '&amp;')
-        s = s.replace('<', '&lt;')
-        s = s.replace('>', '&gt;')
-        s = s.replace('"', '&quot;')
-        s = s.replace("'", '&apos;')
-        return s
-
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/.gitignore b/src/gallium/tests/python/tests/regress/fragment-shader/.gitignore
deleted file mode 100644
index e33609d251c..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*.png
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-abs.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-abs.sh
deleted file mode 100644
index 103d7497f48..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-abs.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-IMM FLT32 { -0.5, -0.4, -0.6, 0.0 }
-
-ADD TEMP[0], IN[0], IMM[0]
-ABS OUT[0], TEMP[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-add.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-add.sh
deleted file mode 100644
index bcb94205963..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-add.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-ADD OUT[0], IN[0], IN[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-cb-1d.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-cb-1d.sh
deleted file mode 100644
index 85fb9ea4e7f..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-cb-1d.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-DCL CONST[1]
-DCL CONST[3]
-DCL TEMP[0..1]
-
-ADD TEMP[0], IN[0], CONST[1]
-RCP TEMP[1], CONST[3].xxxx
-MUL OUT[0], TEMP[0], TEMP[1]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-dp3.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-dp3.sh
deleted file mode 100644
index b5281975d4a..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-dp3.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DP3 OUT[0], IN[0], IN[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-dp4.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-dp4.sh
deleted file mode 100644
index d59df76e70b..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-dp4.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DP4 OUT[0], IN[0].xyzx, IN[0].xyzx
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-dst.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-dst.sh
deleted file mode 100644
index fbb20fa9f62..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-dst.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DST OUT[0], IN[0], IN[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-ex2.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-ex2.sh
deleted file mode 100644
index b511288f4b6..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-ex2.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-EX2 TEMP[0], IN[0].xxxx
-MUL OUT[0], TEMP[0], IN[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-flr.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-flr.sh
deleted file mode 100644
index 99a2f96103a..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-flr.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-IMM FLT32 { 2.5, 4.0,  2.0, 1.0 }
-IMM FLT32 { 0.4, 0.25, 0.5, 1.0 }
-
-MUL TEMP[0], IN[0], IMM[0]
-FLR TEMP[0], TEMP[0]
-MUL OUT[0], TEMP[0], IMM[1]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-frc.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-frc.sh
deleted file mode 100644
index a54c2623b0a..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-frc.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-IMM FLT32 { 2.7, 3.1, 4.5, 1.0 }
-
-MUL TEMP[0], IN[0], IMM[0]
-FRC OUT[0], TEMP[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-lg2.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-lg2.sh
deleted file mode 100644
index 5f5b4be1092..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-lg2.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-IMM FLT32 { 1.0, 0.0, 0.0, 0.0 }
-IMM FLT32 { 0.5, 0.0, 0.0, 0.0 }
-
-ADD TEMP[0], IN[0], IMM[0]
-LG2 TEMP[0].x, TEMP[0].xxxx
-ADD OUT[0], TEMP[0], IMM[1]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-lit.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-lit.sh
deleted file mode 100644
index 6323c4712dc..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-lit.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-LIT OUT[0], IN[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-lrp.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-lrp.sh
deleted file mode 100644
index 740809d22e0..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-lrp.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-ABS TEMP[0], IN[0]
-LRP OUT[0], TEMP[0], IN[0].xxxx, IN[0].yyyy
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-mad.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-mad.sh
deleted file mode 100644
index 413b9dc3916..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-mad.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-IMM FLT32 { 0.5, 0.4, 0.6, 1.0 }
-IMM FLT32 { 0.5, 0.4, 0.6, 0.0 }
-
-MAD OUT[0], IN[0], IMM[0], IMM[1]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-max.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-max.sh
deleted file mode 100644
index b69f2132612..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-max.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-IMM FLT32 { 0.4, 0.4, 0.4, 0.0 }
-
-MAX OUT[0], IN[0], IMM[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-min.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-min.sh
deleted file mode 100644
index df284f49e71..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-min.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-IMM FLT32 { 0.6, 0.6, 0.6, 1.0 }
-
-MIN OUT[0], IN[0], IMM[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-mov.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-mov.sh
deleted file mode 100644
index 64af72f381b..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-mov.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-MOV OUT[0], IN[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-mul.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-mul.sh
deleted file mode 100644
index bdd0b0026b9..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-mul.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-IMM FLT32 { 0.5, 0.6, 0.7, 1.0 }
-
-MUL OUT[0], IN[0], IMM[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-rcp.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-rcp.sh
deleted file mode 100644
index f4b611b26ab..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-rcp.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-IMM FLT32 { 1.0, 0.0, 0.0, 0.0 }
-IMM FLT32 { 1.5, 0.0, 0.0, 0.0 }
-
-ADD TEMP[0], IN[0], IMM[0]
-RCP TEMP[0].x, TEMP[0].xxxx
-SUB OUT[0], TEMP[0], IMM[1]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-rsq.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-rsq.sh
deleted file mode 100644
index d1e9b0b53be..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-rsq.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-IMM FLT32 { 1.0, 0.0, 0.0, 0.0 }
-IMM FLT32 { 1.5, 0.0, 0.0, 0.0 }
-
-ADD TEMP[0], IN[0], IMM[0]
-RSQ TEMP[0].x, TEMP[0].xxxx
-SUB OUT[0], TEMP[0], IMM[1]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-sge.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-sge.sh
deleted file mode 100644
index 1f33fac4727..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-sge.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-IMM FLT32 { 0.6, 0.6, 0.6, 0.0 }
-
-SGE TEMP[0], IN[0], IMM[0]
-MUL OUT[0], IN[0], TEMP[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-slt.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-slt.sh
deleted file mode 100644
index d58b7886a12..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-slt.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-IMM FLT32 { 0.6, 0.6, 0.6, 0.0 }
-
-SLT TEMP[0], IN[0], IMM[0]
-MUL OUT[0], IN[0], TEMP[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-abs.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-abs.sh
deleted file mode 100644
index ecd19248c64..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-abs.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-IMM FLT32 { -0.3, -0.5, -0.4, 0.0 }
-
-ADD TEMP[0], IN[0], IMM[0]
-MOV OUT[0], |TEMP[0]|
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-absneg.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-absneg.sh
deleted file mode 100644
index c2d99ddd15b..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-absneg.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-IMM FLT32 { -0.2, -0.3, -0.4, 0.0 }
-IMM FLT32 { -1.0, -1.0, -1.0, -1.0 }
-
-ADD TEMP[0], IN[0], IMM[0]
-MOV TEMP[0], -|TEMP[0]|
-MUL OUT[0], TEMP[0], IMM[1]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-neg.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-neg.sh
deleted file mode 100644
index a08ab6d2dcb..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-neg.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-DCL TEMP[0]
-
-SUB TEMP[0], IN[0], IN[0].yzxw
-MOV OUT[0], -TEMP[0]
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-swz.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-swz.sh
deleted file mode 100644
index 6110647d979..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-srcmod-swz.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-MOV OUT[0], IN[0].yxzw
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-sub.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-sub.sh
deleted file mode 100644
index 673fca139aa..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-sub.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-SUB OUT[0], IN[0], IN[0].yzxw
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/frag-xpd.sh b/src/gallium/tests/python/tests/regress/fragment-shader/frag-xpd.sh
deleted file mode 100644
index 6ec8b1184cc..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/frag-xpd.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-FRAG
-
-DCL IN[0], COLOR, LINEAR
-DCL OUT[0], COLOR
-
-XPD OUT[0], IN[0], IN[0].yzxw
-
-END
diff --git a/src/gallium/tests/python/tests/regress/fragment-shader/fragment-shader.py b/src/gallium/tests/python/tests/regress/fragment-shader/fragment-shader.py
deleted file mode 100644
index ef65a9c5a1b..00000000000
--- a/src/gallium/tests/python/tests/regress/fragment-shader/fragment-shader.py
+++ /dev/null
@@ -1,257 +0,0 @@
-#!/usr/bin/env python
-##########################################################################
-# 
-# Copyright 2009 VMware, Inc.
-# All Rights Reserved.
-# 
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sub license, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-# 
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial portions
-# of the Software.
-# 
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
-# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-# 
-##########################################################################
-
-import struct
-
-from gallium import *
-
-def make_image(surface):
-    data = surface.get_tile_rgba8(0, 0, surface.width, surface.height)
-
-    import Image
-    outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1)
-    return outimage
-
-def save_image(filename, surface):
-    outimage = make_image(surface)
-    outimage.save(filename, "PNG")
-
-def test(dev, name):
-    ctx = dev.context_create()
-
-    width = 320
-    height = 320
-    minz = 0.0
-    maxz = 1.0
-
-    # disabled blending/masking
-    blend = Blend()
-    blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE
-    blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE
-    blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO
-    blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO
-    blend.rt[0].colormask = PIPE_MASK_RGBA
-    ctx.set_blend(blend)
-
-    # depth/stencil/alpha
-    depth_stencil_alpha = DepthStencilAlpha()
-    depth_stencil_alpha.depth.enabled = 0
-    depth_stencil_alpha.depth.writemask = 1
-    depth_stencil_alpha.depth.func = PIPE_FUNC_LESS
-    ctx.set_depth_stencil_alpha(depth_stencil_alpha)
-
-    # rasterizer
-    rasterizer = Rasterizer()
-    rasterizer.front_winding = PIPE_WINDING_CW
-    rasterizer.cull_mode = PIPE_WINDING_NONE
-    rasterizer.scissor = 1
-    ctx.set_rasterizer(rasterizer)
-
-    # viewport
-    viewport = Viewport()
-    scale = FloatArray(4)
-    scale[0] = width / 2.0
-    scale[1] = -height / 2.0
-    scale[2] = (maxz - minz) / 2.0
-    scale[3] = 1.0
-    viewport.scale = scale
-    translate = FloatArray(4)
-    translate[0] = width / 2.0
-    translate[1] = height / 2.0
-    translate[2] = (maxz - minz) / 2.0
-    translate[3] = 0.0
-    viewport.translate = translate
-    ctx.set_viewport(viewport)
-
-    # samplers
-    sampler = Sampler()
-    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-    sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-    sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE
-    sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST
-    sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST
-    sampler.normalized_coords = 1
-    ctx.set_fragment_sampler(0, sampler)
-
-    # scissor
-    scissor = Scissor()
-    scissor.minx = 0
-    scissor.miny = 0
-    scissor.maxx = width
-    scissor.maxy = height
-    ctx.set_scissor(scissor)
-
-    clip = Clip()
-    clip.nr = 0
-    ctx.set_clip(clip)
-
-    # framebuffer
-    cbuf = dev.resource_create(
-        PIPE_FORMAT_B8G8R8X8_UNORM,
-        width, height,
-        bind=PIPE_BIND_RENDER_TARGET,
-    ).get_surface()
-    fb = Framebuffer()
-    fb.width = width
-    fb.height = height
-    fb.nr_cbufs = 1
-    fb.set_cbuf(0, cbuf)
-    ctx.set_framebuffer(fb)
-    rgba = FloatArray(4);
-    rgba[0] = 0.5
-    rgba[1] = 0.5
-    rgba[2] = 0.5
-    rgba[3] = 0.5
-    ctx.clear(PIPE_CLEAR_COLOR, rgba, 0.0, 0)
-
-    # vertex shader
-    vs = Shader('''
-        VERT
-        DCL IN[0], POSITION
-        DCL IN[1], COLOR
-        DCL OUT[0], POSITION
-        DCL OUT[1], COLOR
-        MOV OUT[0], IN[0]
-        MOV OUT[1], IN[1]
-        END
-    ''')
-    ctx.set_vertex_shader(vs)
-
-    # fragment shader
-    fs = Shader(file('frag-' + name + '.sh', 'rt').read())
-    ctx.set_fragment_shader(fs)
-
-    constbuf0 = dev.buffer_create(64,
-                                  (PIPE_BUFFER_USAGE_CONSTANT |
-                                   PIPE_BUFFER_USAGE_GPU_READ |
-                                   PIPE_BUFFER_USAGE_CPU_WRITE),
-                                  4 * 4 * 4)
-
-    cbdata = ''
-    cbdata += struct.pack('4f', 0.4, 0.0, 0.0, 1.0)
-    cbdata += struct.pack('4f', 1.0, 1.0, 1.0, 1.0)
-    cbdata += struct.pack('4f', 2.0, 2.0, 2.0, 2.0)
-    cbdata += struct.pack('4f', 4.0, 8.0, 16.0, 32.0)
-
-    constbuf0.write(cbdata, 0)
-
-    ctx.set_constant_buffer(PIPE_SHADER_FRAGMENT,
-                            0,
-                            constbuf0)
-
-    constbuf1 = dev.buffer_create(64,
-                                  (PIPE_BUFFER_USAGE_CONSTANT |
-                                   PIPE_BUFFER_USAGE_GPU_READ |
-                                   PIPE_BUFFER_USAGE_CPU_WRITE),
-                                  4 * 4 * 4)
-
-    cbdata = ''
-    cbdata += struct.pack('4f', 0.1, 0.1, 0.1, 0.1)
-    cbdata += struct.pack('4f', 0.25, 0.25, 0.25, 0.25)
-    cbdata += struct.pack('4f', 0.5, 0.5, 0.5, 0.5)
-    cbdata += struct.pack('4f', 0.75, 0.75, 0.75, 0.75)
-
-    constbuf1.write(cbdata, 0)
-
-    ctx.set_constant_buffer(PIPE_SHADER_FRAGMENT,
-                            1,
-                            constbuf1)
-
-    xy = [
-        -0.8, -0.8,
-         0.8, -0.8,
-         0.0,  0.8,
-    ]
-    color = [
-        1.0, 0.0, 0.0,
-        0.0, 1.0, 0.0,
-        0.0, 0.0, 1.0,
-    ]
-
-    nverts = 3
-    nattrs = 2
-    verts = FloatArray(nverts * nattrs * 4)
-
-    for i in range(0, nverts):
-        verts[i * nattrs * 4 + 0] = xy[i * 2 + 0] # x
-        verts[i * nattrs * 4 + 1] = xy[i * 2 + 1] # y
-        verts[i * nattrs * 4 + 2] = 0.5 # z
-        verts[i * nattrs * 4 + 3] = 1.0 # w
-        verts[i * nattrs * 4 + 4] = color[i * 3 + 0] # r
-        verts[i * nattrs * 4 + 5] = color[i * 3 + 1] # g
-        verts[i * nattrs * 4 + 6] = color[i * 3 + 2] # b
-        verts[i * nattrs * 4 + 7] = 1.0 # a
-
-    ctx.draw_vertices(PIPE_PRIM_TRIANGLES,
-                      nverts,
-                      nattrs,
-                      verts)
-
-    ctx.flush()
-
-    save_image('frag-' + name + '.png', cbuf)
-
-def main():
-    tests = [
-        'abs',
-        'add',
-        'cb-1d',
-        'cb-2d',
-        'dp3',
-        'dp4',
-        'dst',
-        'ex2',
-        'flr',
-        'frc',
-        'lg2',
-        'lit',
-        'lrp',
-        'mad',
-        'max',
-        'min',
-        'mov',
-        'mul',
-        'rcp',
-        'rsq',
-        'sge',
-        'slt',
-        'srcmod-abs',
-        'srcmod-absneg',
-        'srcmod-neg',
-        'srcmod-swz',
-        'sub',
-        'xpd',
-    ]
-
-    dev = Device()
-    for t in tests:
-        test(dev, t)
-
-if __name__ == '__main__':
-    main()
diff --git a/src/gallium/tests/python/tests/regress/vertex-shader/.gitignore b/src/gallium/tests/python/tests/regress/vertex-shader/.gitignore
deleted file mode 100644
index e33609d251c..00000000000
--- a/src/gallium/tests/python/tests/regress/vertex-shader/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*.png
diff --git a/src/gallium/tests/python/tests/regress/vertex-shader/vertex-shader.py b/src/gallium/tests/python/tests/regress/vertex-shader/vertex-shader.py
deleted file mode 100644
index 05e40dbd5f1..00000000000
--- a/src/gallium/tests/python/tests/regress/vertex-shader/vertex-shader.py
+++ /dev/null
@@ -1,287 +0,0 @@
-#!/usr/bin/env python
-##########################################################################
-# 
-# Copyright 2009 VMware, Inc.
-# All Rights Reserved.
-# 
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sub license, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-# 
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial portions
-# of the Software.
-# 
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
-# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-# 
-##########################################################################
-
-
-import struct
-
-from gallium import *
-
-def make_image(surface):
-    data = surface.get_tile_rgba8(0, 0, surface.width, surface.height)
-
-    import Image
-    outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1)
-    return outimage
-
-def save_image(filename, surface):
-    outimage = make_image(surface)
-    outimage.save(filename, "PNG")
-
-def test(dev, name):
-    ctx = dev.context_create()
-
-    width = 320
-    height = 320
-    minz = 0.0
-    maxz = 1.0
-
-    # disabled blending/masking
-    blend = Blend()
-    blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE
-    blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE
-    blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO
-    blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO
-    blend.rt[0].colormask = PIPE_MASK_RGBA
-    ctx.set_blend(blend)
-
-    # depth/stencil/alpha
-    depth_stencil_alpha = DepthStencilAlpha()
-    depth_stencil_alpha.depth.enabled = 0
-    depth_stencil_alpha.depth.writemask = 1
-    depth_stencil_alpha.depth.func = PIPE_FUNC_LESS
-    ctx.set_depth_stencil_alpha(depth_stencil_alpha)
-
-    # rasterizer
-    rasterizer = Rasterizer()
-    rasterizer.front_winding = PIPE_WINDING_CW
-    rasterizer.cull_mode = PIPE_WINDING_NONE
-    rasterizer.scissor = 1
-    ctx.set_rasterizer(rasterizer)
-
-    # viewport
-    viewport = Viewport()
-    scale = FloatArray(4)
-    scale[0] = width / 2.0
-    scale[1] = -height / 2.0
-    scale[2] = (maxz - minz) / 2.0
-    scale[3] = 1.0
-    viewport.scale = scale
-    translate = FloatArray(4)
-    translate[0] = width / 2.0
-    translate[1] = height / 2.0
-    translate[2] = (maxz - minz) / 2.0
-    translate[3] = 0.0
-    viewport.translate = translate
-    ctx.set_viewport(viewport)
-
-    # samplers
-    sampler = Sampler()
-    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-    sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-    sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE
-    sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST
-    sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST
-    sampler.normalized_coords = 1
-    ctx.set_fragment_sampler(0, sampler)
-
-    # scissor
-    scissor = Scissor()
-    scissor.minx = 0
-    scissor.miny = 0
-    scissor.maxx = width
-    scissor.maxy = height
-    ctx.set_scissor(scissor)
-
-    clip = Clip()
-    clip.nr = 0
-    ctx.set_clip(clip)
-
-    # framebuffer
-    cbuf = dev.resource_create(
-        PIPE_FORMAT_B8G8R8X8_UNORM,
-        width, height,
-        bind=PIPE_BIND_RENDER_TARGET,
-    ).get_surface()
-    fb = Framebuffer()
-    fb.width = width
-    fb.height = height
-    fb.nr_cbufs = 1
-    fb.set_cbuf(0, cbuf)
-    ctx.set_framebuffer(fb)
-    rgba = FloatArray(4);
-    rgba[0] = 0.5
-    rgba[1] = 0.5
-    rgba[2] = 0.5
-    rgba[3] = 0.5
-    ctx.clear(PIPE_CLEAR_COLOR, rgba, 0.0, 0)
-
-    # vertex shader
-    vs = Shader(file('vert-' + name + '.sh', 'rt').read())
-    ctx.set_vertex_shader(vs)
-
-    # fragment shader
-    fs = Shader('''
-        FRAG
-        DCL IN[0], COLOR, LINEAR
-        DCL OUT[0], COLOR, CONSTANT
-        0:MOV OUT[0], IN[0]
-        1:END
-    ''')
-    ctx.set_fragment_shader(fs)
-
-    constbuf0 = dev.buffer_create(64,
-                                  (PIPE_BUFFER_USAGE_CONSTANT |
-                                   PIPE_BUFFER_USAGE_GPU_READ |
-                                   PIPE_BUFFER_USAGE_CPU_WRITE),
-                                  4 * 4 * 4)
-
-    cbdata = ''
-    cbdata += struct.pack('4f', 0.4, 0.0, 0.0, 1.0)
-    cbdata += struct.pack('4f', 1.0, 1.0, 1.0, 1.0)
-    cbdata += struct.pack('4f', 2.0, 2.0, 2.0, 2.0)
-    cbdata += struct.pack('4f', 4.0, 8.0, 16.0, 32.0)
-
-    constbuf0.write(cbdata, 0)
-
-    ctx.set_constant_buffer(PIPE_SHADER_VERTEX,
-                            0,
-                            constbuf0)
-
-    constbuf1 = dev.buffer_create(64,
-                                  (PIPE_BUFFER_USAGE_CONSTANT |
-                                   PIPE_BUFFER_USAGE_GPU_READ |
-                                   PIPE_BUFFER_USAGE_CPU_WRITE),
-                                  4 * 4 * 4)
-
-    cbdata = ''
-    cbdata += struct.pack('4f', 0.1, 0.1, 0.1, 0.1)
-    cbdata += struct.pack('4f', 0.25, 0.25, 0.25, 0.25)
-    cbdata += struct.pack('4f', 0.5, 0.5, 0.5, 0.5)
-    cbdata += struct.pack('4f', 0.75, 0.75, 0.75, 0.75)
-
-    constbuf1.write(cbdata, 0)
-
-    ctx.set_constant_buffer(PIPE_SHADER_VERTEX,
-                            1,
-                            constbuf1)
-
-    xy = [
-         0.0,  0.8,
-        -0.2,  0.4,
-         0.2,  0.4,
-        -0.4,  0.0,
-         0.0,  0.0,
-         0.4,  0.0,
-        -0.6, -0.4,
-        -0.2, -0.4,
-         0.2, -0.4,
-         0.6, -0.4,
-        -0.8, -0.8,
-        -0.4, -0.8,
-         0.0, -0.8,
-         0.4, -0.8,
-         0.8, -0.8,
-    ]
-    color = [
-        1.0, 0.0, 0.0,
-        0.0, 1.0, 0.0,
-        0.0, 0.0, 1.0,
-    ]
-    tri = [
-         1,  2,  0,
-         3,  4,  1,
-         4,  2,  1,
-         4,  5,  2,
-         6,  7,  3,
-         7,  4,  3,
-         7,  8,  4,
-         8,  5,  4,
-         8,  9,  5,
-        10, 11,  6,
-        11,  7,  6,
-        11, 12,  7,
-        12,  8,  7,
-        12, 13,  8,
-        13,  9,  8,
-        13, 14,  9,
-    ]
-
-    nverts = 16 * 3
-    nattrs = 2
-    verts = FloatArray(nverts * nattrs * 4)
-
-    for i in range(0, nverts):
-        verts[i * nattrs * 4 + 0] = xy[tri[i] * 2 + 0] # x
-        verts[i * nattrs * 4 + 1] = xy[tri[i] * 2 + 1] # y
-        verts[i * nattrs * 4 + 2] = 0.5 # z
-        verts[i * nattrs * 4 + 3] = 1.0 # w
-        verts[i * nattrs * 4 + 4] = color[(i % 3) * 3 + 0] # r
-        verts[i * nattrs * 4 + 5] = color[(i % 3) * 3 + 1] # g
-        verts[i * nattrs * 4 + 6] = color[(i % 3) * 3 + 2] # b
-        verts[i * nattrs * 4 + 7] = 1.0 # a
-
-    ctx.draw_vertices(PIPE_PRIM_TRIANGLES,
-                      nverts,
-                      nattrs,
-                      verts)
-
-    ctx.flush()
-
-    save_image('vert-' + name + '.png', cbuf)
-
-def main():
-    tests = [
-        'abs',
-        'add',
-        'arl',
-        'arr',
-        'cb-1d',
-        'cb-2d',
-        'dp3',
-        'dp4',
-        'dst',
-        'ex2',
-        'flr',
-        'frc',
-        'lg2',
-        'lit',
-        'lrp',
-        'mad',
-        'max',
-        'min',
-        'mov',
-        'mul',
-        'rcp',
-        'rsq',
-        'sge',
-        'slt',
-        'srcmod-abs',
-        'srcmod-absneg',
-        'srcmod-neg',
-        'srcmod-swz',
-        'sub',
-        'xpd',
-    ]
-
-    dev = Device()
-    for t in tests:
-        test(dev, t)
-
-if __name__ == '__main__':
-    main()
diff --git a/src/gallium/tests/python/tests/texture_render.py b/src/gallium/tests/python/tests/texture_render.py
deleted file mode 100755
index 23f3d2a57de..00000000000
--- a/src/gallium/tests/python/tests/texture_render.py
+++ /dev/null
@@ -1,320 +0,0 @@
-#!/usr/bin/env python
-##########################################################################
-# 
-# Copyright 2009 VMware, Inc.
-# All Rights Reserved.
-# 
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sub license, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-# 
-# The above copyright notice and this permission notice (including the
-# next paragraph) shall be included in all copies or substantial portions
-# of the Software.
-# 
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
-# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
-# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-# 
-##########################################################################
-
-
-from gallium import *
-from base import *
-
-
-def lods(*dims):
-    size = max(dims)
-    lods = 0
-    while size:
-        lods += 1
-        size >>= 1
-    return lods
-
-
-class TextureTest(TestCase):
-    
-    tags = (
-        'target',
-        'format',
-        'width',
-        'height',
-        'depth',
-        'last_level',
-        'face',
-        'level',
-        'zslice',
-    )
-
-    def test(self):
-        dev = self.dev
-        
-        target = self.target
-        format = self.format
-        width = self.width
-        height = self.height
-        depth = self.depth
-        last_level = self.last_level
-        face = self.face
-        level = self.level
-        zslice = self.zslice
-        
-        #  textures
-        dst_texture = dev.resource_create(
-            target = target,
-            format = format, 
-            width = width, 
-            height = height,
-            depth = depth, 
-            last_level = last_level,
-            bind = PIPE_BIND_RENDER_TARGET,
-        )
-        if dst_texture is None:
-            raise TestSkip
-
-        dst_surface = dst_texture.get_surface(face = face, level = level, zslice = zslice)
-        
-        ref_texture = dev.resource_create(
-            target = target,
-            format = format, 
-            width = dst_surface.width, 
-            height = dst_surface.height,
-            depth = 1, 
-            last_level = 0,
-            bind = PIPE_BIND_SAMPLER_VIEW,
-        )
-
-        ref_surface = ref_texture.get_surface()
-        
-        src_texture = dev.resource_create(
-            target = target,
-            format = PIPE_FORMAT_B8G8R8A8_UNORM, 
-            width = dst_surface.width, 
-            height = dst_surface.height,
-            depth = 1, 
-            last_level = 0,
-            bind = PIPE_BIND_SAMPLER_VIEW,
-        )
-
-        src_surface = src_texture.get_surface()
-        
-        expected_rgba = FloatArray(height*width*4) 
-        ref_surface.sample_rgba(expected_rgba)
-
-        src_surface.put_tile_rgba(0, 0, src_surface.width, src_surface.height, expected_rgba)
-        
-        ctx = self.dev.context_create()
-    
-        # disabled blending/masking
-        blend = Blend()
-        blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE
-        blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE
-        blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO
-        blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO
-        blend.rt[0].colormask = PIPE_MASK_RGBA
-        ctx.set_blend(blend)
-    
-        # no-op depth/stencil/alpha
-        depth_stencil_alpha = DepthStencilAlpha()
-        ctx.set_depth_stencil_alpha(depth_stencil_alpha)
-    
-        # rasterizer
-        rasterizer = Rasterizer()
-        rasterizer.front_winding = PIPE_WINDING_CW
-        rasterizer.cull_mode = PIPE_WINDING_NONE
-        rasterizer.bypass_vs_clip_and_viewport = 1
-        ctx.set_rasterizer(rasterizer)
-    
-        # samplers
-        sampler = Sampler()
-        sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-        sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-        sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE
-        sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST
-        sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST
-        sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST
-        sampler.normalized_coords = 1
-        sampler.min_lod = 0
-        sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1
-        ctx.set_fragment_sampler(0, sampler)
-        ctx.set_fragment_sampler_texture(0, src_texture)
-
-        #  framebuffer 
-        cbuf_tex = dev.resource_create(
-            PIPE_FORMAT_B8G8R8A8_UNORM, 
-            width, 
-            height,
-            bind = PIPE_BIND_RENDER_TARGET,
-        )
-
-        fb = Framebuffer()
-        fb.width = dst_surface.width
-        fb.height = dst_surface.height
-        fb.nr_cbufs = 1
-        fb.set_cbuf(0, dst_surface)
-        ctx.set_framebuffer(fb)
-        rgba = FloatArray(4);
-        rgba[0] = 0.0
-        rgba[1] = 0.0
-        rgba[2] = 0.0
-        rgba[3] = 0.0
-        ctx.clear(PIPE_CLEAR_COLOR, rgba, 0.0, 0)
-        del fb
-    
-        # vertex shader
-        vs = Shader('''
-            VERT
-            DCL IN[0], POSITION, CONSTANT
-            DCL IN[1], GENERIC, CONSTANT
-            DCL OUT[0], POSITION, CONSTANT
-            DCL OUT[1], GENERIC, CONSTANT
-            0:MOV OUT[0], IN[0]
-            1:MOV OUT[1], IN[1]
-            2:END
-        ''')
-        #vs.dump()
-        ctx.set_vertex_shader(vs)
-    
-        # fragment shader
-        fs = Shader('''
-            FRAG
-            DCL IN[0], GENERIC[0], LINEAR
-            DCL OUT[0], COLOR, CONSTANT
-            DCL SAMP[0], CONSTANT
-            0:TEX OUT[0], IN[0], SAMP[0], 2D
-            1:END
-        ''')
-        #fs.dump()
-        ctx.set_fragment_shader(fs)
-
-        nverts = 4
-        nattrs = 2
-        verts = FloatArray(nverts * nattrs * 4)
-    
-        x = 0
-        y = 0
-        w = dst_surface.width
-        h = dst_surface.height
-    
-        pos = [
-            [x, y],
-            [x+w, y],
-            [x+w, y+h],
-            [x, y+h],
-        ]
-    
-        tex = [
-            [0.0, 0.0], 
-            [1.0, 0.0], 
-            [1.0, 1.0], 
-            [0.0, 1.0],
-        ]
-    
-        for i in range(0, 4):
-            j = 8*i
-            verts[j + 0] = pos[i][0] # x
-            verts[j + 1] = pos[i][1] # y
-            verts[j + 2] = 0.0 # z
-            verts[j + 3] = 1.0 # w
-            verts[j + 4] = tex[i][0] # s
-            verts[j + 5] = tex[i][1] # r
-            verts[j + 6] = 0.0
-            verts[j + 7] = 1.0
-    
-        ctx.draw_vertices(PIPE_PRIM_TRIANGLE_FAN,
-                          nverts, 
-                          nattrs, 
-                          verts)
-    
-        ctx.flush()
-    
-        self.assert_rgba(dst_surface, x, y, w, h, expected_rgba, 4.0/256, 0.85)
-        
-
-
-def main():
-    dev = Device()
-    suite = TestSuite()
-    
-    targets = [
-        PIPE_TEXTURE_2D,
-        PIPE_TEXTURE_CUBE,
-        #PIPE_TEXTURE_3D,
-    ]
-    
-    formats = [
-        PIPE_FORMAT_B8G8R8A8_UNORM,
-        PIPE_FORMAT_B8G8R8X8_UNORM,
-        #PIPE_FORMAT_B8G8R8A8_SRGB,
-        PIPE_FORMAT_B5G6R5_UNORM,
-        PIPE_FORMAT_B5G5R5A1_UNORM,
-        PIPE_FORMAT_B4G4R4A4_UNORM,
-        #PIPE_FORMAT_Z32_UNORM,
-        #PIPE_FORMAT_S8_USCALED_Z24_UNORM,
-        #PIPE_FORMAT_X8Z24_UNORM,
-        #PIPE_FORMAT_Z16_UNORM,
-        #PIPE_FORMAT_S8_USCALED,
-        PIPE_FORMAT_A8_UNORM,
-        PIPE_FORMAT_L8_UNORM,
-        #PIPE_FORMAT_DXT1_RGB,
-        #PIPE_FORMAT_DXT1_RGBA,
-        #PIPE_FORMAT_DXT3_RGBA,
-        #PIPE_FORMAT_DXT5_RGBA,
-    ]
-    
-    sizes = [64, 32, 16, 8, 4, 2, 1]
-    #sizes = [1020, 508, 252, 62, 30, 14, 6, 3]
-    #sizes = [64]
-    #sizes = [63]
-    
-    faces = [
-        PIPE_TEX_FACE_POS_X,
-        PIPE_TEX_FACE_NEG_X,
-        PIPE_TEX_FACE_POS_Y,
-        PIPE_TEX_FACE_NEG_Y, 
-        PIPE_TEX_FACE_POS_Z, 
-        PIPE_TEX_FACE_NEG_Z,
-    ]
-
-    for target in targets:
-        for format in formats:
-            for size in sizes:
-                if target == PIPE_TEXTURE_3D:
-                    depth = size
-                else:
-                    depth = 1
-                for face in faces:
-                    if target != PIPE_TEXTURE_CUBE and face:
-                        continue
-                    levels = lods(size)
-                    for last_level in range(levels):
-                        for level in range(0, last_level + 1):
-                            zslice = 0
-                            while zslice < depth >> level:
-                                test = TextureTest(
-                                    dev = dev,
-                                    target = target,
-                                    format = format, 
-                                    width = size,
-                                    height = size,
-                                    depth = depth,
-                                    last_level = last_level,
-                                    face = face,
-                                    level = level,
-                                    zslice = zslice,
-                                )
-                                suite.add_test(test)
-                                zslice = (zslice + 1)*2 - 1
-    suite.run()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/src/gallium/tests/python/tests/tree.py b/src/gallium/tests/python/tests/tree.py
deleted file mode 100755
index 0c1bcda4cf2..00000000000
--- a/src/gallium/tests/python/tests/tree.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env python
-# 
-# See also:
-#  http://www.ailab.si/orange/doc/ofb/c_otherclass.htm
-
-import os.path
-import sys
-
-import orange
-import orngTree
-
-for arg in sys.argv[1:]:
-    name, ext = os.path.splitext(arg)
-
-    data = orange.ExampleTable(arg)
-
-    tree = orngTree.TreeLearner(data, sameMajorityPruning=1, mForPruning=2)
-
-    orngTree.printTxt(tree)
-
-    file(name+'.txt', 'wt').write(orngTree.dumpTree(tree) + '\n')
-
-    orngTree.printDot(tree, fileName=name+'.dot', nodeShape='ellipse', leafShape='box')
diff --git a/src/gallium/tests/trivial/.gitignore b/src/gallium/tests/trivial/.gitignore
deleted file mode 100644
index af6cdedbeba..00000000000
--- a/src/gallium/tests/trivial/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-tri
-quad-tex
-result.bmp
diff --git a/src/gallium/tests/trivial/Makefile b/src/gallium/tests/trivial/Makefile
deleted file mode 100644
index bfcbdd9712d..00000000000
--- a/src/gallium/tests/trivial/Makefile
+++ /dev/null
@@ -1,44 +0,0 @@
-# progs/gallium/simple/Makefile
-
-TOP = ../../../..
-include $(TOP)/configs/current
-
-INCLUDES = \
-	-I. \
-	-I$(TOP)/src/gallium/include \
-	-I$(TOP)/src/gallium/auxiliary \
-	-I$(TOP)/src/gallium/drivers \
-	-I$(TOP)/src/gallium/winsys \
-	$(PROG_INCLUDES)
-
-LINKS = \
-	$(TOP)/src/gallium/drivers/trace/libtrace.a \
-	$(TOP)/src/gallium/winsys/sw/null/libws_null.a \
-	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
-	$(GALLIUM_AUXILIARIES) \
-	$(PROG_LINKS)
-
-SOURCES = \
-	tri.c \
-	quad-tex.c
-
-OBJECTS = $(SOURCES:.c=.o)
-
-PROGS = $(OBJECTS:.o=)
-
-##### TARGETS #####
-
-default: $(PROGS)
-
-clean:
-	-rm -f $(PROGS)
-	-rm -f *.o
-	-rm -f result.bmp
-
-##### RULES #####
-
-$(OBJECTS): %.o: %.c
-	$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $(PROG_DEFINES) $< -o $@
-
-$(PROGS): %: %.o $(LINKS)
-	$(CC) $(LDFLAGS) $< $(LINKS) -lm -lpthread -ldl -o $@
diff --git a/src/gallium/tests/unit/Makefile b/src/gallium/tests/unit/Makefile
deleted file mode 100644
index f65958dadd5..00000000000
--- a/src/gallium/tests/unit/Makefile
+++ /dev/null
@@ -1,47 +0,0 @@
-# progs/gallium/simple/Makefile
-
-TOP = ../../../..
-include $(TOP)/configs/current
-
-INCLUDES = \
-	-I. \
-	-I$(TOP)/src/gallium/include \
-	-I$(TOP)/src/gallium/auxiliary \
-	-I$(TOP)/src/gallium/drivers \
-	-I$(TOP)/src/gallium/winsys \
-	$(PROG_INCLUDES)
-
-LINKS = \
-	$(TOP)/src/gallium/drivers/trace/libtrace.a \
-	$(TOP)/src/gallium/winsys/sw/null/libws_null.a \
-	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
-	$(GALLIUM_AUXILIARIES) \
-	$(PROG_LINKS)
-
-SOURCES = \
-	pipe_barrier_test.c \
-	u_cache_test.c \
-	u_half_test.c \
-	u_format_test.c
-
-
-OBJECTS = $(SOURCES:.c=.o)
-
-PROGS = $(OBJECTS:.o=)
-
-##### TARGETS #####
-
-default: $(PROGS)
-
-clean:
-	-rm -f $(PROGS)
-	-rm -f *.o
-	-rm -f result.bmp
-
-##### RULES #####
-
-$(OBJECTS): %.o: %.c
-	$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $(PROG_DEFINES) $< -o $@
-
-$(PROGS): %: %.o
-	$(CC) $(LDFLAGS) $< $(LINKS) -lm -lpthread -ldl -o $@
diff --git a/src/gallium/tests/unit/SConscript b/src/gallium/tests/unit/SConscript
deleted file mode 100644
index 8a9f3504c75..00000000000
--- a/src/gallium/tests/unit/SConscript
+++ /dev/null
@@ -1,25 +0,0 @@
-Import('*')
-
-env = env.Clone()
-
-env.Prepend(LIBS = [gallium])
-
-progs = [
-    'pipe_barrier_test',
-    'u_cache_test',
-    'u_format_test',
-    'u_half_test'
-]
-
-for prog in progs:
-    prog = env.Program(
-        target = prog,
-        source = prog + '.c',
-    )
-    
-    env.InstallProgram(prog)
-
-    # http://www.scons.org/wiki/UnitTests
-    test_alias = env.Alias('unit', [prog], prog[0].abspath)
-    AlwaysBuild(test_alias)
-
diff --git a/src/gallium/tests/unit/pipe_barrier_test.c b/src/gallium/tests/unit/pipe_barrier_test.c
deleted file mode 100644
index f5d72b0abae..00000000000
--- a/src/gallium/tests/unit/pipe_barrier_test.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009-2010 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/*
- *  Test case for pipe_barrier.
- *
- *  The test succeeds if no thread exits before all the other threads reach
- *  the barrier.
- */
-
-
-#include <stdio.h>
-
-#include "os/os_thread.h"
-#include "os/os_time.h"
-
-
-#define NUM_THREADS 10
-
-static pipe_thread threads[NUM_THREADS];
-static pipe_barrier barrier;
-static int thread_ids[NUM_THREADS];
-
-
-static PIPE_THREAD_ROUTINE(thread_function, thread_data)
-{
-   int thread_id = *((int *) thread_data);
-
-   printf("thread %d starting\n", thread_id);
-   os_time_sleep(thread_id * 1000 * 1000);
-   printf("thread %d before barrier\n", thread_id);
-   pipe_barrier_wait(&barrier);
-   printf("thread %d exiting\n", thread_id);
-
-   return NULL;
-}
-
-
-int main()
-{
-   int i;
-
-   printf("pipe_barrier_test starting\n");
-
-   pipe_barrier_init(&barrier, NUM_THREADS);
-
-   for (i = 0; i < NUM_THREADS; i++) {
-      thread_ids[i] = i;
-      threads[i] = pipe_thread_create(thread_function, (void *) &thread_ids[i]);
-   }
-
-   for (i = 0; i < NUM_THREADS; i++ ) {
-      pipe_thread_wait(threads[i]);
-   }
-
-   pipe_barrier_destroy(&barrier);
-
-   printf("pipe_barrier_test exiting\n");
-
-   return 0;
-}
diff --git a/src/gallium/tests/unit/u_cache_test.c b/src/gallium/tests/unit/u_cache_test.c
deleted file mode 100644
index 0b62a765230..00000000000
--- a/src/gallium/tests/unit/u_cache_test.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/*
- * Test case for u_cache.
- */
-
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "util/u_cache.h"
-#include "util/u_hash.h"
-
-
-typedef uint32_t cache_test_key;
-typedef uint32_t cache_test_value;
-
-
-static uint32_t
-cache_test_hash(const void *key)
-{
-   return util_hash_crc32(key, sizeof(cache_test_key));
-}
-
-
-static void
-cache_test_destroy(void *key, void *value)
-{
-   free(key);
-   free(value);
-}
-
-
-static int
-cache_test_compare(const void *key1, const void *key2) {
-   return !(key1 == key2);
-}
-
-
-int main() {
-   unsigned cache_size;
-   unsigned cache_count;
-
-   for (cache_size = 2; cache_size < (1 << 15); cache_size *= 2) {
-      for (cache_count = (cache_size << 5); cache_count < (cache_size << 10); cache_count *= 2) {
-         struct util_cache * cache;
-         cache_test_key *key;
-         cache_test_value *value_in;
-         cache_test_value *value_out;
-         int i;
-
-         printf("Testing cache size of %d with %d values.\n", cache_size, cache_count);
-
-         cache = util_cache_create(cache_test_hash,
-                                   cache_test_compare,
-                                   cache_test_destroy,
-                                   cache_size);
-
-         /*
-          * Retrieve a value from an empty cache.
-          */
-         key = malloc(sizeof(cache_test_key));
-         *key = 0xdeadbeef;
-         value_out = (cache_test_value *) util_cache_get(cache, key);
-         assert(value_out == NULL);
-         free(key);
-
-
-         /*
-          * Repeatedly insert into and retrieve values from the cache.
-          */
-         for (i = 0; i < cache_count; i++) {
-            key = malloc(sizeof(cache_test_key));
-            value_in = malloc(sizeof(cache_test_value));
-
-            *key = rand();
-            *value_in = rand();
-            util_cache_set(cache, key, value_in);
-
-            value_out = util_cache_get(cache, key);
-            assert(value_out != NULL);
-            assert(value_in == value_out);
-            assert(*value_in == *value_out);
-         }
-
-         /*
-          * In debug builds, this will trigger a self-check by the cache of
-          * the distribution of hits in its internal cache entries.
-          */
-         util_cache_destroy(cache);
-      }
-   }
-
-   return 0;
-}
diff --git a/src/gallium/tests/unit/u_format_test.c b/src/gallium/tests/unit/u_format_test.c
deleted file mode 100644
index cfde6af75e0..00000000000
--- a/src/gallium/tests/unit/u_format_test.c
+++ /dev/null
@@ -1,708 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009-2010 VMware, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <float.h>
-
-#include "util/u_half.h"
-#include "util/u_format.h"
-#include "util/u_format_tests.h"
-#include "util/u_format_s3tc.h"
-
-
-static boolean
-compare_float(float x, float y)
-{
-   float error = y - x;
-
-   if (error < 0.0f)
-      error = -error;
-
-   if (error > FLT_EPSILON) {
-      return FALSE;
-   }
-
-   return TRUE;
-}
-
-
-static void
-print_packed(const struct util_format_description *format_desc,
-             const char *prefix,
-             const uint8_t *packed,
-             const char *suffix)
-{
-   unsigned i;
-   const char *sep = "";
-
-   printf("%s", prefix);
-   for (i = 0; i < format_desc->block.bits/8; ++i) {
-      printf("%s%02x", sep, packed[i]);
-      sep = " ";
-   }
-   printf("%s", suffix);
-}
-
-
-static void
-print_unpacked_rgba_doubl(const struct util_format_description *format_desc,
-                     const char *prefix,
-                     const double unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4],
-                     const char *suffix)
-{
-   unsigned i, j;
-   const char *sep = "";
-
-   printf("%s", prefix);
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         printf("%s{%f, %f, %f, %f}", sep, unpacked[i][j][0], unpacked[i][j][1], unpacked[i][j][2], unpacked[i][j][3]);
-         sep = ", ";
-      }
-      sep = ",\n";
-   }
-   printf("%s", suffix);
-}
-
-
-static void
-print_unpacked_rgba_float(const struct util_format_description *format_desc,
-                     const char *prefix,
-                     float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4],
-                     const char *suffix)
-{
-   unsigned i, j;
-   const char *sep = "";
-
-   printf("%s", prefix);
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         printf("%s{%f, %f, %f, %f}", sep, unpacked[i][j][0], unpacked[i][j][1], unpacked[i][j][2], unpacked[i][j][3]);
-         sep = ", ";
-      }
-      sep = ",\n";
-   }
-   printf("%s", suffix);
-}
-
-
-static void
-print_unpacked_rgba_8unorm(const struct util_format_description *format_desc,
-                      const char *prefix,
-                      uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4],
-                      const char *suffix)
-{
-   unsigned i, j;
-   const char *sep = "";
-
-   printf("%s", prefix);
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         printf("%s{0x%02x, 0x%02x, 0x%02x, 0x%02x}", sep, unpacked[i][j][0], unpacked[i][j][1], unpacked[i][j][2], unpacked[i][j][3]);
-         sep = ", ";
-      }
-   }
-   printf("%s", suffix);
-}
-
-
-static void
-print_unpacked_z_float(const struct util_format_description *format_desc,
-                       const char *prefix,
-                       float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH],
-                       const char *suffix)
-{
-   unsigned i, j;
-   const char *sep = "";
-
-   printf("%s", prefix);
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         printf("%s%f", sep, unpacked[i][j]);
-         sep = ", ";
-      }
-      sep = ",\n";
-   }
-   printf("%s", suffix);
-}
-
-
-static void
-print_unpacked_z_32unorm(const struct util_format_description *format_desc,
-                         const char *prefix,
-                         uint32_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH],
-                         const char *suffix)
-{
-   unsigned i, j;
-   const char *sep = "";
-
-   printf("%s", prefix);
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         printf("%s0x%08x", sep, unpacked[i][j]);
-         sep = ", ";
-      }
-   }
-   printf("%s", suffix);
-}
-
-
-static void
-print_unpacked_s_8uscaled(const struct util_format_description *format_desc,
-                          const char *prefix,
-                          uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH],
-                          const char *suffix)
-{
-   unsigned i, j;
-   const char *sep = "";
-
-   printf("%s", prefix);
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         printf("%s0x%02x", sep, unpacked[i][j]);
-         sep = ", ";
-      }
-   }
-   printf("%s", suffix);
-}
-
-
-static boolean
-test_format_fetch_rgba_float(const struct util_format_description *format_desc,
-                             const struct util_format_test_case *test)
-{
-   float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } };
-   unsigned i, j, k;
-   boolean success;
-
-   success = TRUE;
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         format_desc->fetch_rgba_float(unpacked[i][j], test->packed, j, i);
-         for (k = 0; k < 4; ++k) {
-            if (!compare_float(test->unpacked[i][j][k], unpacked[i][j][k])) {
-               success = FALSE;
-            }
-         }
-      }
-   }
-
-   if (!success) {
-      print_unpacked_rgba_float(format_desc, "FAILED: ", unpacked, " obtained\n");
-      print_unpacked_rgba_doubl(format_desc, "        ", test->unpacked, " expected\n");
-   }
-
-   return success;
-}
-
-
-static boolean
-test_format_unpack_rgba_float(const struct util_format_description *format_desc,
-                              const struct util_format_test_case *test)
-{
-   float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } };
-   unsigned i, j, k;
-   boolean success;
-
-   format_desc->unpack_rgba_float(&unpacked[0][0][0], sizeof unpacked[0],
-                             test->packed, 0,
-                             format_desc->block.width, format_desc->block.height);
-
-   success = TRUE;
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         for (k = 0; k < 4; ++k) {
-            if (!compare_float(test->unpacked[i][j][k], unpacked[i][j][k])) {
-               success = FALSE;
-            }
-         }
-      }
-   }
-
-   if (!success) {
-      print_unpacked_rgba_float(format_desc, "FAILED: ", unpacked, " obtained\n");
-      print_unpacked_rgba_doubl(format_desc, "        ", test->unpacked, " expected\n");
-   }
-
-   return success;
-}
-
-
-static boolean
-test_format_pack_rgba_float(const struct util_format_description *format_desc,
-                            const struct util_format_test_case *test)
-{
-   float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4];
-   uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES];
-   unsigned i, j, k;
-   boolean success;
-
-   if (test->format == PIPE_FORMAT_DXT1_RGBA) {
-      /*
-       * Skip S3TC as packed representation is not canonical.
-       *
-       * TODO: Do a round trip conversion.
-       */
-      return TRUE;
-   }
-
-   memset(packed, 0, sizeof packed);
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         for (k = 0; k < 4; ++k) {
-            unpacked[i][j][k] = (float) test->unpacked[i][j][k];
-         }
-      }
-   }
-
-   format_desc->pack_rgba_float(packed, 0,
-                           &unpacked[0][0][0], sizeof unpacked[0],
-                           format_desc->block.width, format_desc->block.height);
-
-   success = TRUE;
-   for (i = 0; i < format_desc->block.bits/8; ++i)
-      if ((test->packed[i] & test->mask[i]) != (packed[i] & test->mask[i]))
-         success = FALSE;
-
-   if (!success) {
-      print_packed(format_desc, "FAILED: ", packed, " obtained\n");
-      print_packed(format_desc, "        ", test->packed, " expected\n");
-   }
-
-   return success;
-}
-
-
-static boolean
-convert_float_to_8unorm(uint8_t *dst, const double *src)
-{
-   unsigned i;
-   boolean accurate = TRUE;
-
-   for (i = 0; i < UTIL_FORMAT_MAX_UNPACKED_HEIGHT*UTIL_FORMAT_MAX_UNPACKED_WIDTH*4; ++i) {
-      if (src[i] < 0.0) {
-         accurate = FALSE;
-         dst[i] = 0;
-      }
-      else if (src[i] > 1.0) {
-         accurate = FALSE;
-         dst[i] = 255;
-      }
-      else {
-         dst[i] = src[i] * 255.0;
-      }
-   }
-
-   return accurate;
-}
-
-
-static boolean
-test_format_unpack_rgba_8unorm(const struct util_format_description *format_desc,
-                               const struct util_format_test_case *test)
-{
-   uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } };
-   uint8_t expected[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4] = { { { 0 } } };
-   unsigned i, j, k;
-   boolean success;
-
-   format_desc->unpack_rgba_8unorm(&unpacked[0][0][0], sizeof unpacked[0],
-                              test->packed, 0,
-                              format_desc->block.width, format_desc->block.height);
-
-   convert_float_to_8unorm(&expected[0][0][0], &test->unpacked[0][0][0]);
-
-   success = TRUE;
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         for (k = 0; k < 4; ++k) {
-            if (expected[i][j][k] != unpacked[i][j][k]) {
-               success = FALSE;
-            }
-         }
-      }
-   }
-
-   if (!success) {
-      print_unpacked_rgba_8unorm(format_desc, "FAILED: ", unpacked, " obtained\n");
-      print_unpacked_rgba_8unorm(format_desc, "        ", expected, " expected\n");
-   }
-
-   return success;
-}
-
-
-static boolean
-test_format_pack_rgba_8unorm(const struct util_format_description *format_desc,
-                             const struct util_format_test_case *test)
-{
-   uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH][4];
-   uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES];
-   unsigned i;
-   boolean success;
-
-   if (test->format == PIPE_FORMAT_DXT1_RGBA) {
-      /*
-       * Skip S3TC as packed representation is not canonical.
-       *
-       * TODO: Do a round trip conversion.
-       */
-      return TRUE;
-   }
-
-   if (!convert_float_to_8unorm(&unpacked[0][0][0], &test->unpacked[0][0][0])) {
-      /*
-       * Skip test cases which cannot be represented by four unorm bytes.
-       */
-      return TRUE;
-   }
-
-   memset(packed, 0, sizeof packed);
-
-   format_desc->pack_rgba_8unorm(packed, 0,
-                            &unpacked[0][0][0], sizeof unpacked[0],
-                            format_desc->block.width, format_desc->block.height);
-
-   success = TRUE;
-   for (i = 0; i < format_desc->block.bits/8; ++i)
-      if ((test->packed[i] & test->mask[i]) != (packed[i] & test->mask[i]))
-         success = FALSE;
-
-   if (!success) {
-      print_packed(format_desc, "FAILED: ", packed, " obtained\n");
-      print_packed(format_desc, "        ", test->packed, " expected\n");
-   }
-
-   return success;
-}
-
-
-static boolean
-test_format_unpack_z_float(const struct util_format_description *format_desc,
-                              const struct util_format_test_case *test)
-{
-   float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH] = { { 0 } };
-   unsigned i, j;
-   boolean success;
-
-   format_desc->unpack_z_float(&unpacked[0][0], sizeof unpacked[0],
-                               test->packed, 0,
-                               format_desc->block.width, format_desc->block.height);
-
-   success = TRUE;
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         if (!compare_float(test->unpacked[i][j][0], unpacked[i][j])) {
-            success = FALSE;
-         }
-      }
-   }
-
-   if (!success) {
-      print_unpacked_z_float(format_desc, "FAILED: ", unpacked, " obtained\n");
-      print_unpacked_rgba_doubl(format_desc, "        ", test->unpacked, " expected\n");
-   }
-
-   return success;
-}
-
-
-static boolean
-test_format_pack_z_float(const struct util_format_description *format_desc,
-                            const struct util_format_test_case *test)
-{
-   float unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH];
-   uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES];
-   unsigned i, j;
-   boolean success;
-
-   memset(packed, 0, sizeof packed);
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         unpacked[i][j] = (float) test->unpacked[i][j][0];
-         if (test->unpacked[i][j][1]) {
-            return TRUE;
-         }
-      }
-   }
-
-   format_desc->pack_z_float(packed, 0,
-                             &unpacked[0][0], sizeof unpacked[0],
-                             format_desc->block.width, format_desc->block.height);
-
-   success = TRUE;
-   for (i = 0; i < format_desc->block.bits/8; ++i)
-      if ((test->packed[i] & test->mask[i]) != (packed[i] & test->mask[i]))
-         success = FALSE;
-
-   if (!success) {
-      print_packed(format_desc, "FAILED: ", packed, " obtained\n");
-      print_packed(format_desc, "        ", test->packed, " expected\n");
-   }
-
-   return success;
-}
-
-
-static boolean
-test_format_unpack_z_32unorm(const struct util_format_description *format_desc,
-                               const struct util_format_test_case *test)
-{
-   uint32_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH] = { { 0 } };
-   uint32_t expected[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH] = { { 0 } };
-   unsigned i, j;
-   boolean success;
-
-   format_desc->unpack_z_32unorm(&unpacked[0][0], sizeof unpacked[0],
-                                 test->packed, 0,
-                                 format_desc->block.width, format_desc->block.height);
-
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         expected[i][j] = test->unpacked[i][j][0] * 0xffffffff;
-      }
-   }
-
-   success = TRUE;
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         if (expected[i][j] != unpacked[i][j]) {
-            success = FALSE;
-         }
-      }
-   }
-
-   if (!success) {
-      print_unpacked_z_32unorm(format_desc, "FAILED: ", unpacked, " obtained\n");
-      print_unpacked_z_32unorm(format_desc, "        ", expected, " expected\n");
-   }
-
-   return success;
-}
-
-
-static boolean
-test_format_pack_z_32unorm(const struct util_format_description *format_desc,
-                             const struct util_format_test_case *test)
-{
-   uint32_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH];
-   uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES];
-   unsigned i, j;
-   boolean success;
-
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         unpacked[i][j] = test->unpacked[i][j][0] * 0xffffffff;
-         if (test->unpacked[i][j][1]) {
-            return TRUE;
-         }
-      }
-   }
-
-   memset(packed, 0, sizeof packed);
-
-   format_desc->pack_z_32unorm(packed, 0,
-                               &unpacked[0][0], sizeof unpacked[0],
-                               format_desc->block.width, format_desc->block.height);
-
-   success = TRUE;
-   for (i = 0; i < format_desc->block.bits/8; ++i)
-      if ((test->packed[i] & test->mask[i]) != (packed[i] & test->mask[i]))
-         success = FALSE;
-
-   if (!success) {
-      print_packed(format_desc, "FAILED: ", packed, " obtained\n");
-      print_packed(format_desc, "        ", test->packed, " expected\n");
-   }
-
-   return success;
-}
-
-
-static boolean
-test_format_unpack_s_8uscaled(const struct util_format_description *format_desc,
-                               const struct util_format_test_case *test)
-{
-   uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH] = { { 0 } };
-   uint8_t expected[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH] = { { 0 } };
-   unsigned i, j;
-   boolean success;
-
-   format_desc->unpack_s_8uscaled(&unpacked[0][0], sizeof unpacked[0],
-                                  test->packed, 0,
-                                  format_desc->block.width, format_desc->block.height);
-
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         expected[i][j] = test->unpacked[i][j][1];
-      }
-   }
-
-   success = TRUE;
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         if (expected[i][j] != unpacked[i][j]) {
-            success = FALSE;
-         }
-      }
-   }
-
-   if (!success) {
-      print_unpacked_s_8uscaled(format_desc, "FAILED: ", unpacked, " obtained\n");
-      print_unpacked_s_8uscaled(format_desc, "        ", expected, " expected\n");
-   }
-
-   return success;
-}
-
-
-static boolean
-test_format_pack_s_8uscaled(const struct util_format_description *format_desc,
-                             const struct util_format_test_case *test)
-{
-   uint8_t unpacked[UTIL_FORMAT_MAX_UNPACKED_HEIGHT][UTIL_FORMAT_MAX_UNPACKED_WIDTH];
-   uint8_t packed[UTIL_FORMAT_MAX_PACKED_BYTES];
-   unsigned i, j;
-   boolean success;
-
-   for (i = 0; i < format_desc->block.height; ++i) {
-      for (j = 0; j < format_desc->block.width; ++j) {
-         unpacked[i][j] = test->unpacked[i][j][1];
-         if (test->unpacked[i][j][0]) {
-            return TRUE;
-         }
-      }
-   }
-
-   memset(packed, 0, sizeof packed);
-
-   format_desc->pack_s_8uscaled(packed, 0,
-                                &unpacked[0][0], sizeof unpacked[0],
-                                format_desc->block.width, format_desc->block.height);
-
-   success = TRUE;
-   for (i = 0; i < format_desc->block.bits/8; ++i)
-      if ((test->packed[i] & test->mask[i]) != (packed[i] & test->mask[i]))
-         success = FALSE;
-
-   if (!success) {
-      print_packed(format_desc, "FAILED: ", packed, " obtained\n");
-      print_packed(format_desc, "        ", test->packed, " expected\n");
-   }
-
-   return success;
-}
-
-
-typedef boolean
-(*test_func_t)(const struct util_format_description *format_desc,
-               const struct util_format_test_case *test);
-
-
-static boolean
-test_one_func(const struct util_format_description *format_desc,
-              test_func_t func,
-              const char *suffix)
-{
-   unsigned i;
-   bool success = TRUE;
-
-   printf("Testing util_format_%s_%s ...\n",
-          format_desc->short_name, suffix);
-
-   for (i = 0; i < util_format_nr_test_cases; ++i) {
-      const struct util_format_test_case *test = &util_format_test_cases[i];
-
-      if (test->format == format_desc->format) {
-         if (!func(format_desc, &util_format_test_cases[i])) {
-           success = FALSE;
-         }
-      }
-   }
-
-   return success;
-}
-
-
-static boolean
-test_all(void)
-{
-   enum pipe_format format;
-   bool success = TRUE;
-
-   for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
-      const struct util_format_description *format_desc;
-
-      format_desc = util_format_description(format);
-      if (!format_desc) {
-         continue;
-      }
-
-      if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
-          !util_format_s3tc_enabled) {
-         continue;
-      }
-
-#     define TEST_ONE_FUNC(name) \
-      if (format_desc->name) { \
-         if (!test_one_func(format_desc, &test_format_##name, #name)) { \
-           success = FALSE; \
-         } \
-      }
-
-      TEST_ONE_FUNC(fetch_rgba_float);
-      TEST_ONE_FUNC(pack_rgba_float);
-      TEST_ONE_FUNC(unpack_rgba_float);
-      TEST_ONE_FUNC(pack_rgba_8unorm);
-      TEST_ONE_FUNC(unpack_rgba_8unorm);
-
-      TEST_ONE_FUNC(unpack_z_32unorm);
-      TEST_ONE_FUNC(pack_z_32unorm);
-      TEST_ONE_FUNC(unpack_z_float);
-      TEST_ONE_FUNC(pack_z_float);
-      TEST_ONE_FUNC(unpack_s_8uscaled);
-      TEST_ONE_FUNC(pack_s_8uscaled);
-
-#     undef TEST_ONE_FUNC
-   }
-
-   return success;
-}
-
-
-int main(int argc, char **argv)
-{
-   boolean success;
-
-   util_format_s3tc_init();
-
-   success = test_all();
-
-   return success ? 0 : 1;
-}
diff --git a/src/gallium/tests/unit/u_half_test.c b/src/gallium/tests/unit/u_half_test.c
deleted file mode 100644
index 00bda7f50a6..00000000000
--- a/src/gallium/tests/unit/u_half_test.c
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <float.h>
-
-#include "util/u_math.h"
-#include "util/u_half.h"
-
-int
-main(int argc, char **argv)
-{
-   unsigned i;
-   unsigned roundtrip_fails = 0;
-   for(i = 0; i < 1 << 16; ++i)
-   {
-      uint16_t h = (uint16_t) i;
-      union fi f;
-      uint16_t rh;
-      f.ui = util_half_to_floatui(h);
-      rh = util_floatui_to_half(f.ui);
-      if(h != rh)
-      {
-	 printf("Roundtrip failed: %x -> %x = %f -> %x\n", h, f.ui, f.f, rh);
-	 ++roundtrip_fails;
-      }
-   }
-
-   if(roundtrip_fails)
-      printf("Failure! %u/65536 half floats failed a conversion to float and back.\n", roundtrip_fails);
-   else
-      printf("Success!\n");
-   return 0;
-}
-- 
cgit v1.2.3


From 06a49b18729890417094aa9602c1cc1ea8b970e2 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Wed, 14 Jul 2010 00:51:18 +0200
Subject: fixed compilation

---
 src/gallium/state_trackers/vdpau/surface.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index 1f481098ede..2de2ee222c1 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -94,6 +94,7 @@ vlVdpVideoSurfaceCreate(VdpDevice device,
 
 no_handle:
    FREE(p_surf->psurface);
+inv_device:
 no_surf:
    FREE(p_surf);
 no_res:
-- 
cgit v1.2.3


From c97ccc33531d4bf3f3154515317255645ada2afe Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Sun, 18 Jul 2010 23:42:49 +0200
Subject: Added decode.c

---
 src/gallium/state_trackers/vdpau/decode.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 src/gallium/state_trackers/vdpau/decode.c

diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
new file mode 100644
index 00000000000..b85185d0ad0
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -0,0 +1,29 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vdpau_private.h"
+
-- 
cgit v1.2.3


From 725e4ada3062c80623abf51477dfdc73fe294f3f Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@io.dk>
Date: Tue, 20 Jul 2010 14:25:28 +0200
Subject: Made some decoding function for mpeg2-decoding

---
 src/gallium/auxiliary/vl/vl_compositor.c         |   4 +-
 src/gallium/include/pipe/p_defines.h             |   1 +
 src/gallium/include/pipe/p_video_state.h         |   1 +
 src/gallium/state_trackers/vdpau/Makefile        |   3 +-
 src/gallium/state_trackers/vdpau/decode.c        | 185 +++++++++++++++++++++++
 src/gallium/state_trackers/vdpau/device.c        |   2 +-
 src/gallium/state_trackers/vdpau/query.c         |  63 ++++++--
 src/gallium/state_trackers/vdpau/surface.c       |  89 +++++++----
 src/gallium/state_trackers/vdpau/vdpau_private.h |  45 +++++-
 9 files changed, 345 insertions(+), 48 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 0640b1a4565..415dc92555f 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -627,8 +627,8 @@ void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float
       pipe_buffer_map(compositor->pipe, compositor->fs_const_buf,
                       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
                       &buf_transfer),
-      mat,
-      sizeof(struct fragment_shader_consts)
+		mat,
+		sizeof(struct fragment_shader_consts)
    );
 
    pipe_buffer_unmap(compositor->pipe, compositor->fs_const_buf,
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 28318183183..26ba7b8002a 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -474,6 +474,7 @@ enum pipe_video_codec
 
 enum pipe_video_profile
 {
+   PIPE_VIDEO_PROFILE_UNKNOWN,
    PIPE_VIDEO_PROFILE_MPEG1,
    PIPE_VIDEO_PROFILE_MPEG2_SIMPLE,
    PIPE_VIDEO_PROFILE_MPEG2_MAIN,
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 5eb96352139..1450f3488f9 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -74,6 +74,7 @@ enum pipe_mpeg12_dct_type
    PIPE_MPEG12_DCT_TYPE_FRAME
 };
 
+
 struct pipe_macroblock
 {
    enum pipe_video_codec codec;
diff --git a/src/gallium/state_trackers/vdpau/Makefile b/src/gallium/state_trackers/vdpau/Makefile
index 53378a9c1ff..a1b83abc6dd 100644
--- a/src/gallium/state_trackers/vdpau/Makefile
+++ b/src/gallium/state_trackers/vdpau/Makefile
@@ -15,7 +15,8 @@ C_SOURCES = htab.c \
 	    ftab.c \
 	    device.c \
 	    query.c \
-	    surface.c
+	    surface.c \
+	    decode.c
 
 
 include ../../Makefile.template
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index b85185d0ad0..8daf7a47f97 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -26,4 +26,189 @@
  **************************************************************************/
 
 #include "vdpau_private.h"
+#include <util/u_memory.h>
+#include <pipe/p_video_context.h>
 
+VdpStatus
+vlVdpDecoderCreate ( 	VdpDevice device, 
+						VdpDecoderProfile profile, 
+						uint32_t width, uint32_t height, 
+						uint32_t max_references, 
+						VdpDecoder *decoder 
+)
+{
+	enum pipe_video_profile p_profile;
+	VdpStatus ret;
+	vlVdpDecoder *vldecoder;
+	
+	if (!decoder)
+		return VDP_STATUS_INVALID_POINTER;
+	
+	if (!(width && height))
+		return VDP_STATUS_INVALID_VALUE;
+		
+   vlVdpDevice *dev = vlGetDataHTAB(device);
+   if (!dev)  {
+      ret = VDP_STATUS_INVALID_HANDLE;
+      goto inv_device;
+   }
+   
+   vldecoder = CALLOC(1,sizeof(vlVdpDecoder));
+   if (!vldecoder)   {
+	   ret = VDP_STATUS_RESOURCES;
+	   goto no_decoder;
+   }
+	
+   vldecoder->vlscreen = vl_screen_create(dev->display, dev->screen);
+   if (!vldecoder->vlscreen)  
+      ret = VDP_STATUS_RESOURCES;
+	  goto no_screen;
+   
+   
+   p_profile = ProfileToPipe(profile);
+   if (p_profile == PIPE_VIDEO_PROFILE_UNKNOWN)	{
+	   ret = VDP_STATUS_INVALID_DECODER_PROFILE;
+	   goto inv_profile;
+   }
+
+	// TODO: Define max_references. Used mainly for H264
+	
+	vldecoder->chroma_format = p_profile;
+	vldecoder->device = dev;
+		
+	*decoder = vlAddDataHTAB(vldecoder);
+	if (*decoder == 0) {
+      ret = VDP_STATUS_ERROR;
+      goto no_handle;
+	}
+	
+	return VDP_STATUS_OK;
+	
+	no_handle:
+	FREE(vldecoder);
+	inv_profile:
+	no_screen:
+	no_decoder:
+	inv_device:
+    return ret;
+}
+
+VdpStatus
+vlVdpDecoderDestroy  (VdpDecoder decoder
+)
+{
+	vlVdpDecoder *vldecoder;
+	
+	vldecoder = (vlVdpDecoder *)vlGetDataHTAB(decoder);
+	if (!vldecoder)  {
+      return VDP_STATUS_INVALID_HANDLE;
+	}
+	
+	if (vldecoder->vctx)
+		vl_video_destroy(vldecoder->vctx);
+		
+	if (vldecoder->vlscreen)
+		vl_screen_destroy(vldecoder->vlscreen);
+		
+	FREE(vldecoder);
+	
+	return VDP_STATUS_OK;
+}
+
+VdpStatus
+vlVdpCreateSurface		   (vlVdpDecoder *vldecoder,
+							vlVdpSurface *vlsurf
+)
+{
+	
+	return VDP_STATUS_OK;
+}
+
+VdpStatus
+vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
+							vlVdpSurface *vlsurf,
+							VdpPictureInfoMPEG1Or2 *picture_info,
+							uint32_t bitstream_buffer_count,
+							VdpBitstreamBuffer const *bitstream_buffers
+							)
+{
+	struct pipe_video_context *vpipe;
+	vlVdpSurface *t_vdp_surf;
+	vlVdpSurface *p_vdp_surf;
+	vlVdpSurface *f_vdp_surf;
+	struct pipe_surface *t_surf;
+	struct pipe_surface *p_surf;
+	struct pipe_surface *f_surf;
+	uint32_t num_macroblocks;
+
+	vpipe = vldecoder->vctx->vpipe;
+	t_vdp_surf = vlsurf;
+    p_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference);
+	if (p_vdp_surf)
+		return VDP_STATUS_INVALID_HANDLE;
+		
+	f_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference);
+	if (f_vdp_surf)
+		return VDP_STATUS_INVALID_HANDLE;
+		
+	/* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */
+	if (p_vdp_surf ==  VDP_INVALID_HANDLE) p_vdp_surf = NULL;
+	if (f_vdp_surf ==  VDP_INVALID_HANDLE) f_vdp_surf = NULL;
+	
+	vlVdpCreateSurface(vldecoder,t_vdp_surf);
+		
+	num_macroblocks = picture_info->slice_count;
+	struct pipe_mpeg12_macroblock pipe_macroblocks[num_macroblocks];
+	
+	/*VdpMacroBlocksToPipe(vpipe->screen, macroblocks, blocks, first_macroblock,
+                     num_macroblocks, pipe_macroblocks);*/
+		
+	vpipe->set_decode_target(vpipe,t_surf);
+	/*vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks,
+		&pipe_macroblocks->base, &target_surface_priv->render_fence);*/
+}
+
+VdpStatus
+vlVdpDecoderRender (VdpDecoder decoder, 
+					VdpVideoSurface target, 
+					VdpPictureInfo const *picture_info, 
+					uint32_t bitstream_buffer_count, 
+					VdpBitstreamBuffer const *bitstream_buffers
+)
+{
+	vlVdpDecoder *vldecoder;
+	vlVdpSurface *vlsurf;
+	VdpStatus ret;
+		
+	if (!(picture_info && bitstream_buffers))
+		return VDP_STATUS_INVALID_POINTER;
+	
+	
+	vldecoder = (vlVdpDecoder *)vlGetDataHTAB(decoder);
+	if (!vldecoder)
+		return VDP_STATUS_INVALID_HANDLE;
+
+	vlsurf = (vlVdpSurface *)vlGetDataHTAB(target);
+	if (!vlsurf)
+		return VDP_STATUS_INVALID_HANDLE;
+	
+	if (vlsurf->device != vldecoder->device)
+		return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
+		
+	if (vlsurf->chroma_format != vldecoder->chroma_format)
+		return VDP_STATUS_INVALID_CHROMA_TYPE;
+		
+    // TODO: Right now only mpeg2 is supported.
+	switch (vldecoder->vctx->vpipe->profile)   {
+		case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
+		case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
+			ret = vlVdpDecoderRenderMpeg2(vldecoder,vlsurf,(VdpPictureInfoMPEG1Or2 *)picture_info,
+											bitstream_buffer_count,bitstream_buffers);
+			break;
+		default:
+			return VDP_STATUS_INVALID_DECODER_PROFILE;
+	}
+	assert(0);
+
+	return ret;
+}
\ No newline at end of file
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index ba91e16a43f..111b15c619f 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -48,7 +48,7 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, VdpGe
       goto no_htab;
    }
 
-   dev = CALLOC(0, sizeof(vlVdpDevice));
+   dev = CALLOC(1, sizeof(vlVdpDevice));
    if (!dev) {
       ret = VDP_STATUS_RESOURCES;
       goto no_dev;
diff --git a/src/gallium/state_trackers/vdpau/query.c b/src/gallium/state_trackers/vdpau/query.c
index 71793cc8ad5..eb7cfbcdd36 100644
--- a/src/gallium/state_trackers/vdpau/query.c
+++ b/src/gallium/state_trackers/vdpau/query.c
@@ -29,6 +29,7 @@
 #include <vl_winsys.h>
 #include <assert.h>
 #include <pipe/p_screen.h>
+#include <pipe/p_defines.h>
 #include <math.h>
 
 
@@ -56,6 +57,7 @@ VdpStatus
 vlVdpVideoSurfaceQueryCapabilities(VdpDevice device, VdpChromaType surface_chroma_type,
                                    VdpBool *is_supported, uint32_t *max_width, uint32_t *max_height)
 {
+   struct vl_screen *vlscreen;
    uint32_t max_2d_texture_level;
    VdpStatus ret;
 
@@ -66,9 +68,8 @@ vlVdpVideoSurfaceQueryCapabilities(VdpDevice device, VdpChromaType surface_chrom
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
    
-   if (!dev->vlscreen)
-   dev->vlscreen = vl_screen_create(dev->display, dev->screen);
-   if (!dev->vlscreen)
+   vlscreen = vl_screen_create(dev->display, dev->screen);
+   if (!vlscreen)
       return VDP_STATUS_RESOURCES;
 
    /* XXX: Current limits */ 
@@ -78,7 +79,7 @@ vlVdpVideoSurfaceQueryCapabilities(VdpDevice device, VdpChromaType surface_chrom
 	  goto no_sup;
    }
 
-   max_2d_texture_level = dev->vlscreen->pscreen->get_param( dev->vlscreen->pscreen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS );
+   max_2d_texture_level = vlscreen->pscreen->get_param( vlscreen->pscreen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS );
    if (!max_2d_texture_level)  {
       ret = VDP_STATUS_RESOURCES;
 	  goto no_sup;
@@ -87,6 +88,8 @@ vlVdpVideoSurfaceQueryCapabilities(VdpDevice device, VdpChromaType surface_chrom
    /* I am not quite sure if it is max_2d_texture_level-1 or just max_2d_texture_level */
    *max_width = *max_height = pow(2,max_2d_texture_level-1);
    
+   vl_screen_destroy(vlscreen);
+   
    return VDP_STATUS_OK;
    no_sup:
    return ret;
@@ -97,6 +100,8 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
                                                   VdpYCbCrFormat bits_ycbcr_format,
                                                   VdpBool *is_supported)
 {
+	struct vl_screen *vlscreen;
+	
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
 
@@ -104,18 +109,19 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
 
-   if (!dev->vlscreen)
-   dev->vlscreen = vl_screen_create(dev->display, dev->screen);
-   if (!dev->vlscreen)
+   vlscreen = vl_screen_create(dev->display, dev->screen);
+   if (!vlscreen)
       return VDP_STATUS_RESOURCES;
 
    if (bits_ycbcr_format != VDP_YCBCR_FORMAT_Y8U8V8A8) 
-	                               *is_supported = dev->vlscreen->pscreen->is_format_supported(dev->vlscreen->pscreen,
+	                               *is_supported = vlscreen->pscreen->is_format_supported(vlscreen->pscreen,
                                    FormatToPipe(bits_ycbcr_format),
                                    PIPE_TEXTURE_2D,
                                    PIPE_BIND_RENDER_TARGET, 
                                    PIPE_TEXTURE_GEOM_NON_SQUARE );
 								   
+   vl_screen_destroy(vlscreen);
+								   
    return VDP_STATUS_OK;
 }
 
@@ -124,10 +130,49 @@ vlVdpDecoderQueryCapabilities(VdpDevice device, VdpDecoderProfile profile,
                               VdpBool *is_supported, uint32_t *max_level, uint32_t *max_macroblocks,
                               uint32_t *max_width, uint32_t *max_height)
 {
+   enum pipe_video_profile p_profile;
+   uint32_t max_decode_width;
+   uint32_t max_decode_height;
+   uint32_t max_2d_texture_level;
+   struct vl_screen *vlscreen;
+	
    if (!(is_supported && max_level && max_macroblocks && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
+	  
+   vlVdpDevice *dev = vlGetDataHTAB(device);
+   if (!dev)
+      return VDP_STATUS_INVALID_HANDLE;
+   
+   vlscreen = vl_screen_create(dev->display, dev->screen);
+   if (!vlscreen)
+      return VDP_STATUS_RESOURCES;
 
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   p_profile = ProfileToPipe(profile);
+   if (p_profile == PIPE_VIDEO_PROFILE_UNKNOWN)	{
+	   *is_supported = false;
+	   return VDP_STATUS_OK;
+   }
+   
+   if (p_profile != PIPE_VIDEO_PROFILE_MPEG2_SIMPLE && p_profile != PIPE_VIDEO_PROFILE_MPEG2_MAIN)  {
+	   *is_supported = false;
+	   return VDP_STATUS_OK;
+   }
+	   
+   /* XXX hack, need to implement something more sane when the decoders have been implemented */
+   max_2d_texture_level = vlscreen->pscreen->get_param( vlscreen->pscreen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS );
+   max_decode_width = max_decode_height = pow(2,max_2d_texture_level-2);
+   if (!(max_decode_width && max_decode_height))  
+      return VDP_STATUS_RESOURCES;
+	
+   *is_supported = true;
+   *max_width = max_decode_width;
+   *max_height = max_decode_height;
+   *max_level = 16;
+   *max_macroblocks = (max_decode_width/16) * (max_decode_height/16);
+   
+   vl_screen_destroy(vlscreen);
+
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index 2de2ee222c1..18fe788f870 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -29,6 +29,7 @@
 #include <pipe/p_screen.h>
 #include <pipe/p_state.h>
 #include <util/u_memory.h>
+#include <util/u_format.h>
 
 VdpStatus
 vlVdpVideoSurfaceCreate(VdpDevice device,
@@ -52,37 +53,20 @@ vlVdpVideoSurfaceCreate(VdpDevice device,
        goto no_htab;
     }
 
-   p_surf = CALLOC(0, sizeof(p_surf));
+   p_surf = CALLOC(1, sizeof(p_surf));
    if (!p_surf) {
       ret = VDP_STATUS_RESOURCES;
       goto no_res;
    }
 
-   p_surf->psurface = CALLOC(0,sizeof(struct pipe_surface));
-   if (!p_surf->psurface)  {
-	   ret = VDP_STATUS_RESOURCES;
-	   goto no_surf;
-   }
-
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)  {
       ret = VDP_STATUS_INVALID_HANDLE;
       goto inv_device;
    }
 
-   if (!dev->vlscreen)
-   dev->vlscreen = vl_screen_create(dev->display, dev->screen);
-   if (!dev->vlscreen)   {
-      ret = VDP_STATUS_RESOURCES;
-      goto inv_device;
-   }
-
-   p_surf->psurface->height = height;
-   p_surf->psurface->width = width;
-   p_surf->psurface->level = 0;
-   p_surf->psurface->usage = PIPE_USAGE_DEFAULT;
    p_surf->chroma_format = FormatToPipe(chroma_type);
-   p_surf->vlscreen = dev->vlscreen;
+   p_surf->device = dev;
     
    *surface = vlAddDataHTAB(p_surf);
    if (*surface == 0) {
@@ -113,9 +97,12 @@ vlVdpVideoSurfaceDestroy  ( VdpVideoSurface surface )
    if (!p_surf)
        return VDP_STATUS_INVALID_HANDLE;
 
-   if (p_surf->psurface)
-	   p_surf->vlscreen->pscreen->tex_surface_destroy(p_surf->psurface);
-	   
+   if (p_surf->psurface)  {
+	   if (p_surf->psurface->texture)  {
+		   if (p_surf->psurface->texture->screen)
+				p_surf->psurface->texture->screen->tex_surface_destroy(p_surf->psurface);
+	   }
+   }
    FREE(p_surf);
    return VDP_STATUS_OK;
 }
@@ -130,21 +117,17 @@ vlVdpVideoSurfaceGetParameters ( VdpVideoSurface surface,
    if (!(width && height && chroma_type))  
       return VDP_STATUS_INVALID_POINTER; 
    
-
-   if (!vlCreateHTAB()) 
-      return VDP_STATUS_RESOURCES;
-
-
+   
    vlVdpSurface *p_surf = vlGetDataHTAB(surface);
    if (!p_surf) 
       return VDP_STATUS_INVALID_HANDLE;
 
 
-   if (!(p_surf->psurface && p_surf->chroma_format))  
-      return VDP_STATUS_INVALID_HANDLE;
+   if (!(p_surf->chroma_format > 0 && p_surf->chroma_format < 3))  
+      return VDP_STATUS_INVALID_CHROMA_TYPE;
 
-   *width = p_surf->psurface->width;
-   *height = p_surf->psurface->height;
+   *width = p_surf->width;
+   *height = p_surf->height;
    *chroma_type = PipeToType(p_surf->chroma_format);
 
    return VDP_STATUS_OK;
@@ -157,6 +140,50 @@ vlVdpVideoSurfaceGetBitsYCbCr ( VdpVideoSurface surface,
 				uint32_t const *destination_pitches
 )
 {
+    if (!vlCreateHTAB()) 
+      return VDP_STATUS_RESOURCES;
+
+
+    vlVdpSurface *p_surf = vlGetDataHTAB(surface);
+    if (!p_surf) 
+       return VDP_STATUS_INVALID_HANDLE;
+	  
+	if (!p_surf->psurface)
+		return VDP_STATUS_RESOURCES;
+   
    
+	return VDP_STATUS_OK;
+}
+
+VdpStatus
+vlVdpVideoSurfacePutBitsYCbCr ( VdpVideoSurface surface, 
+								VdpYCbCrFormat source_ycbcr_format, 
+								void const *const *source_data, 
+								uint32_t const *source_pitches
+)
+{
+	uint32_t size_surface_bytes;
+	const struct util_format_description *format_desc;
+	enum pipe_format pformat = FormatToPipe(source_ycbcr_format);
+	
+	if (!vlCreateHTAB()) 
+      return VDP_STATUS_RESOURCES;
+
+
+    vlVdpSurface *p_surf = vlGetDataHTAB(surface);
+    if (!p_surf) 
+       return VDP_STATUS_INVALID_HANDLE;
+	   
+	
+	//size_surface_bytes =  ( source_pitches[0] * p_surf->height util_format_get_blockheight(pformat) );   
+	/*util_format_translate(enum pipe_format dst_format,
+                      void *dst, unsigned dst_stride,
+                      unsigned dst_x, unsigned dst_y,
+                      enum pipe_format src_format,
+                      const void *src, unsigned src_stride,
+                      unsigned src_x, unsigned src_y,
+                      unsigned width, unsigned height);*/
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
 
 }
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 27793892185..566c99266ed 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -31,6 +31,7 @@
 
 #include <vdpau/vdpau.h>
 #include <pipe/p_compiler.h>
+#include <pipe/p_video_context.h>
 #include <vl_winsys.h>
 #include <assert.h>
 
@@ -116,21 +117,54 @@ static VdpYCbCrFormat PipeToFormat(enum pipe_format p_format)
    return -1;
 }
 
+static enum pipe_video_profile ProfileToPipe(VdpDecoderProfile vdpau_profile)
+{
+   switch (vdpau_profile) {
+      case VDP_DECODER_PROFILE_MPEG1:
+         return PIPE_VIDEO_PROFILE_MPEG1;
+      case VDP_DECODER_PROFILE_MPEG2_SIMPLE:
+         return PIPE_VIDEO_PROFILE_MPEG2_SIMPLE;
+      case VDP_DECODER_PROFILE_MPEG2_MAIN:
+         return PIPE_VIDEO_PROFILE_MPEG2_MAIN;
+      case VDP_DECODER_PROFILE_H264_BASELINE:
+         return PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE;
+      case VDP_DECODER_PROFILE_H264_MAIN: /* Not defined in p_format.h */
+         return PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN;
+      case VDP_DECODER_PROFILE_H264_HIGH:
+	     return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH;
+      default:
+         PIPE_VIDEO_PROFILE_UNKNOWN;
+   }
+
+   return -1;
+}
+
 typedef struct
 {
    void *display;
    int screen;
-   struct vl_screen *vlscreen;
-   struct vl_context *vctx;
 } vlVdpDevice;
 
 typedef struct
 {
-   struct vl_screen *vlscreen;
+   vlVdpDevice *device;
+   uint32_t width;
+   uint32_t height;
+   uint32_t pitch;
    struct pipe_surface *psurface;
-   enum pipe_video_chroma_format chroma_format; 
+   enum pipe_format format;
+   enum pipe_video_chroma_format chroma_format;
+   uint8_t *data;
 } vlVdpSurface;
 
+typedef struct
+{
+	vlVdpDevice *device;
+	struct vl_screen *vlscreen;
+    struct vl_context *vctx;
+	enum pipe_video_chroma_format chroma_format;
+} vlVdpDecoder;
+
 typedef uint32_t vlHandle;
 
 boolean vlCreateHTAB(void);
@@ -160,5 +194,8 @@ VdpVideoSurfaceDestroy vlVdpVideoSurfaceDestroy;
 VdpVideoSurfaceGetParameters vlVdpVideoSurfaceGetParameters;
 VdpVideoSurfaceGetBitsYCbCr vlVdpVideoSurfaceGetBitsYCbCr;
 VdpVideoSurfacePutBitsYCbCr vlVdpVideoSurfacePutBitsYCbCr;
+VdpDecoderCreate vlVdpDecoderCreate;
+VdpDecoderDestroy vlVdpDecoderDestroy;
+VdpDecoderRender vlVdpDecoderRender;
 
 #endif // VDPAU_PRIVATE_H
\ No newline at end of file
-- 
cgit v1.2.3


From 6ada38d29a9b6eb01ad21e9b1ec089bf42d497da Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@io.dk>
Date: Thu, 22 Jul 2010 01:46:40 +0200
Subject: Added stubs for the rest of the vdpau interface

---
 src/gallium/state_trackers/vdpau/bitmap.c       |  75 ++++++++++++++
 src/gallium/state_trackers/vdpau/color.c        |   0
 src/gallium/state_trackers/vdpau/decode.c       | 125 +++++++++++++++++++-----
 src/gallium/state_trackers/vdpau/device.c       |  59 ++++++++++-
 src/gallium/state_trackers/vdpau/ftab.c         |  46 ++++-----
 src/gallium/state_trackers/vdpau/mixer.c        |   0
 src/gallium/state_trackers/vdpau/output.c       |  43 ++++++++
 src/gallium/state_trackers/vdpau/preemption.c   |   0
 src/gallium/state_trackers/vdpau/presentation.c | 121 +++++++++++++++++++++++
 src/gallium/state_trackers/vdpau/query.c        |  18 +++-
 src/gallium/state_trackers/vdpau/render.c       |   0
 src/gallium/state_trackers/vdpau/surface.c      |   3 +
 12 files changed, 436 insertions(+), 54 deletions(-)
 create mode 100644 src/gallium/state_trackers/vdpau/bitmap.c
 create mode 100644 src/gallium/state_trackers/vdpau/color.c
 create mode 100644 src/gallium/state_trackers/vdpau/mixer.c
 create mode 100644 src/gallium/state_trackers/vdpau/output.c
 create mode 100644 src/gallium/state_trackers/vdpau/preemption.c
 create mode 100644 src/gallium/state_trackers/vdpau/presentation.c
 create mode 100644 src/gallium/state_trackers/vdpau/render.c

diff --git a/src/gallium/state_trackers/vdpau/bitmap.c b/src/gallium/state_trackers/vdpau/bitmap.c
new file mode 100644
index 00000000000..f1a9d9a6828
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/bitmap.c
@@ -0,0 +1,75 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <vdpau/vdpau.h>
+#include "vdpau_private.h"
+#include <util/u_debug.h>
+
+VdpStatus
+vlVdpBitmapSurfaceCreate(	VdpDevice device, 
+							VdpRGBAFormat rgba_format, 
+							uint32_t width, uint32_t height, 
+							VdpBool frequently_accessed, 
+							VdpBitmapSurface *surface)
+{
+	debug_printf("[VDPAU] Creating a bitmap surface\n");
+	if (!surface)
+		return VDP_STATUS_INVALID_POINTER;
+
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpBitmapSurfaceDestroy ( VdpBitmapSurface  surface )
+{
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpBitmapSurfaceGetParameters ( 	VdpBitmapSurface  surface,
+									VdpRGBAFormat  *rgba_format, 
+									uint32_t *width, uint32_t *height, 
+									VdpBool  *frequently_accessed)
+{
+	if (!(rgba_format && width && height && frequently_accessed))
+		return VDP_STATUS_INVALID_POINTER;
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpBitmapSurfacePutBitsNative ( 	VdpBitmapSurface  surface, 
+									void const *const *source_data, 
+									uint32_t const *source_pitches, 
+									VdpRect  const *destination_rect )
+{
+	if (!(source_data && source_pitches && destination_rect))
+		return VDP_STATUS_INVALID_POINTER;
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
\ No newline at end of file
diff --git a/src/gallium/state_trackers/vdpau/color.c b/src/gallium/state_trackers/vdpau/color.c
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 8daf7a47f97..ec3995b98db 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -27,7 +27,9 @@
 
 #include "vdpau_private.h"
 #include <util/u_memory.h>
+#include <util/u_math.h>
 #include <pipe/p_video_context.h>
+#include <util/u_debug.h>
 
 VdpStatus
 vlVdpDecoderCreate ( 	VdpDevice device, 
@@ -37,10 +39,13 @@ vlVdpDecoderCreate ( 	VdpDevice device,
 						VdpDecoder *decoder 
 )
 {
+	struct vl_screen *vscreen;
 	enum pipe_video_profile p_profile;
 	VdpStatus ret;
 	vlVdpDecoder *vldecoder;
 	
+	debug_printf("[VDPAU] Creating decoder\n");
+	
 	if (!decoder)
 		return VDP_STATUS_INVALID_POINTER;
 	
@@ -58,12 +63,6 @@ vlVdpDecoderCreate ( 	VdpDevice device,
 	   ret = VDP_STATUS_RESOURCES;
 	   goto no_decoder;
    }
-	
-   vldecoder->vlscreen = vl_screen_create(dev->display, dev->screen);
-   if (!vldecoder->vlscreen)  
-      ret = VDP_STATUS_RESOURCES;
-	  goto no_screen;
-   
    
    p_profile = ProfileToPipe(profile);
    if (p_profile == PIPE_VIDEO_PROFILE_UNKNOWN)	{
@@ -73,7 +72,7 @@ vlVdpDecoderCreate ( 	VdpDevice device,
 
 	// TODO: Define max_references. Used mainly for H264
 	
-	vldecoder->chroma_format = p_profile;
+	vldecoder->profile = p_profile;
 	vldecoder->device = dev;
 		
 	*decoder = vlAddDataHTAB(vldecoder);
@@ -81,6 +80,7 @@ vlVdpDecoderCreate ( 	VdpDevice device,
       ret = VDP_STATUS_ERROR;
       goto no_handle;
 	}
+	debug_printf("[VDPAU] Decoder created succesfully\n");
 	
 	return VDP_STATUS_OK;
 	
@@ -104,26 +104,76 @@ vlVdpDecoderDestroy  (VdpDecoder decoder
       return VDP_STATUS_INVALID_HANDLE;
 	}
 	
+	if (vldecoder->vctx->vscreen)
+		vl_screen_destroy(vldecoder->vctx->vscreen);
+	
 	if (vldecoder->vctx)
 		vl_video_destroy(vldecoder->vctx);
 		
-	if (vldecoder->vlscreen)
-		vl_screen_destroy(vldecoder->vlscreen);
-		
 	FREE(vldecoder);
 	
 	return VDP_STATUS_OK;
 }
 
 VdpStatus
-vlVdpCreateSurface		   (vlVdpDecoder *vldecoder,
+vlVdpCreateSurfaceTarget   (vlVdpDecoder *vldecoder,
 							vlVdpSurface *vlsurf
 )
 {
+	struct pipe_resource tmplt;
+	struct pipe_resource *surf_tex;
+	struct pipe_video_context *vpipe;
+		
+	if(!(vldecoder && vlsurf))
+		return VDP_STATUS_INVALID_POINTER;
+		
+	vpipe = vldecoder->vctx;
+		
+	memset(&tmplt, 0, sizeof(struct pipe_resource));
+	tmplt.target = PIPE_TEXTURE_2D;
+	tmplt.format = vlsurf->format;
+	tmplt.last_level = 0;
+	if (vpipe->is_format_supported(vpipe, tmplt.format,
+                                  PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET,
+                                  PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO)) {
+      tmplt.width0 = vlsurf->width;
+      tmplt.height0 = vlsurf->height;
+    }
+    else {
+      assert(vpipe->is_format_supported(vpipe, tmplt.format,
+                                       PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET,
+                                       PIPE_TEXTURE_GEOM_NON_SQUARE));
+      tmplt.width0 = util_next_power_of_two(vlsurf->width);
+      tmplt.height0 = util_next_power_of_two(vlsurf->height);
+    }
+	tmplt.depth0 = 1;
+	tmplt.usage = PIPE_USAGE_DEFAULT;
+	tmplt.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+	tmplt.flags = 0;
+	
+	surf_tex = vpipe->screen->resource_create(vpipe->screen, &tmplt);
+	
+	vlsurf->psurface = vpipe->screen->get_tex_surface(vpipe->screen, surf_tex, 0, 0, 0,
+                                         PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
+										 
+	pipe_resource_reference(&surf_tex, NULL);
+	
+	if (!vlsurf->psurface)
+		return VDP_STATUS_RESOURCES;
+	
 	
 	return VDP_STATUS_OK;
 }
 
+static void
+vlVdpMacroBlocksToPipe(struct pipe_screen *screen,
+                  VdpBitstreamBuffer const *bitstream_buffers,
+                  unsigned int num_macroblocks,
+                  struct pipe_mpeg12_macroblock *pipe_macroblocks)
+{
+	debug_printf("NAF!\n");
+}
+
 VdpStatus
 vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 							vlVdpSurface *vlsurf,
@@ -140,32 +190,43 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 	struct pipe_surface *p_surf;
 	struct pipe_surface *f_surf;
 	uint32_t num_macroblocks;
+	VdpStatus ret;
+	
 
 	vpipe = vldecoder->vctx->vpipe;
 	t_vdp_surf = vlsurf;
-    p_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference);
-	if (p_vdp_surf)
-		return VDP_STATUS_INVALID_HANDLE;
-		
-	f_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference);
-	if (f_vdp_surf)
-		return VDP_STATUS_INVALID_HANDLE;
-		
+	
 	/* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */
-	if (p_vdp_surf ==  VDP_INVALID_HANDLE) p_vdp_surf = NULL;
+	if (picture_info->backward_reference ==  VDP_INVALID_HANDLE) 
+		p_vdp_surf = NULL;
+	else	{
+		p_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference);
+		if (!p_vdp_surf)
+			return VDP_STATUS_INVALID_HANDLE;
+	}
+
+	if (picture_info->forward_reference ==  VDP_INVALID_HANDLE) 
+		f_vdp_surf = NULL;
+	else	{
+		f_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference);
+		if (!f_vdp_surf)
+			return VDP_STATUS_INVALID_HANDLE;
+	}
+		
+	
 	if (f_vdp_surf ==  VDP_INVALID_HANDLE) f_vdp_surf = NULL;
 	
-	vlVdpCreateSurface(vldecoder,t_vdp_surf);
+	ret = vlVdpCreateSurfaceTarget(vldecoder,t_vdp_surf);
 		
-	num_macroblocks = picture_info->slice_count;
+	num_macroblocks = bitstream_buffer_count;
 	struct pipe_mpeg12_macroblock pipe_macroblocks[num_macroblocks];
 	
-	/*VdpMacroBlocksToPipe(vpipe->screen, macroblocks, blocks, first_macroblock,
-                     num_macroblocks, pipe_macroblocks);*/
+	vlVdpMacroBlocksToPipe(vpipe->screen, bitstream_buffers,
+                     num_macroblocks, pipe_macroblocks);
 		
 	vpipe->set_decode_target(vpipe,t_surf);
-	/*vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks,
-		&pipe_macroblocks->base, &target_surface_priv->render_fence);*/
+	vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks, &pipe_macroblocks->base, NULL);
+	return ret;
 }
 
 VdpStatus
@@ -178,7 +239,9 @@ vlVdpDecoderRender (VdpDecoder decoder,
 {
 	vlVdpDecoder *vldecoder;
 	vlVdpSurface *vlsurf;
+	struct vl_screen *vscreen;
 	VdpStatus ret;
+	debug_printf("[VDPAU] Decoding\n");
 		
 	if (!(picture_info && bitstream_buffers))
 		return VDP_STATUS_INVALID_POINTER;
@@ -198,6 +261,16 @@ vlVdpDecoderRender (VdpDecoder decoder,
 	if (vlsurf->chroma_format != vldecoder->chroma_format)
 		return VDP_STATUS_INVALID_CHROMA_TYPE;
 		
+	vscreen = vl_screen_create(vldecoder->device->display, vldecoder->device->screen);
+	if (!vscreen)
+		return VDP_STATUS_RESOURCES;
+	
+	vldecoder->vctx = vl_video_create(vscreen, vldecoder->profile, vlsurf->format, vlsurf->width, vlsurf->height);
+	if (!vldecoder->vctx)
+		return VDP_STATUS_RESOURCES;
+		
+	vldecoder->vctx->vscreen = vscreen;
+		
     // TODO: Right now only mpeg2 is supported.
 	switch (vldecoder->vctx->vpipe->profile)   {
 		case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index 111b15c619f..d370d1c6610 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -29,6 +29,7 @@
 #include <pipe/p_compiler.h>
 #include <vl_winsys.h>
 #include <util/u_memory.h>
+#include <util/u_debug.h>
 #include "vdpau_private.h"
 
 VdpDeviceCreateX11 vdp_imp_device_create_x11;
@@ -56,7 +57,6 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, VdpGe
    dev->display = display;
    dev->screen = screen;
 
-
    *device = vlAddDataHTAB(dev);
    if (*device == 0) {
       ret = VDP_STATUS_ERROR;
@@ -64,6 +64,8 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, VdpGe
    }
 
    *get_proc_address = &vlVdpGetProcAddress;
+   
+   debug_printf("[VDPAU] Device created succesfully\n");
 
    return VDP_STATUS_OK;
 
@@ -75,7 +77,8 @@ no_htab:
    return ret;
 }
 
-VdpStatus vlVdpDeviceDestroy(VdpDevice device)
+VdpStatus 
+vlVdpDeviceDestroy(VdpDevice device)
 {
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)
@@ -83,10 +86,13 @@ VdpStatus vlVdpDeviceDestroy(VdpDevice device)
    FREE(dev);
    vlDestroyHTAB();
 
+   debug_printf("[VDPAU] Device destroyed succesfully\n");
+
    return VDP_STATUS_OK;
 }
 
-VdpStatus vlVdpGetProcAddress(VdpDevice device, VdpFuncId function_id, void **function_pointer)
+VdpStatus 
+vlVdpGetProcAddress(VdpDevice device, VdpFuncId function_id, void **function_pointer)
 {
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)
@@ -100,3 +106,50 @@ VdpStatus vlVdpGetProcAddress(VdpDevice device, VdpFuncId function_id, void **fu
 
    return VDP_STATUS_OK;
 }
+
+#define _ERROR_TYPE(TYPE,STRING) \
+	case TYPE:	\
+		return STRING; \
+	break
+
+char const * 
+vlVdpGetErrorString (
+VdpStatus status)
+{
+	switch (status)
+	{
+		_ERROR_TYPE(VDP_STATUS_OK,"The operation completed successfully; no error.");
+		_ERROR_TYPE(VDP_STATUS_NO_IMPLEMENTATION,"No backend implementation could be loaded.");
+		_ERROR_TYPE(VDP_STATUS_DISPLAY_PREEMPTED,"The display was preempted, or a fatal error occurred. The application must re-initialize VDPAU.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_HANDLE,"An invalid handle value was provided. Either the handle does not exist at all, or refers to an object of an incorrect type.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_POINTER ,"An invalid pointer was provided. Typically, this means that a NULL pointer was provided for an 'output' parameter.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_CHROMA_TYPE ,"An invalid/unsupported VdpChromaType value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_Y_CB_CR_FORMAT,"An invalid/unsupported VdpYCbCrFormat value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_RGBA_FORMAT,"An invalid/unsupported VdpRGBAFormat value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_INDEXED_FORMAT,"An invalid/unsupported VdpIndexedFormat value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_COLOR_STANDARD,"An invalid/unsupported VdpColorStandard value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_COLOR_TABLE_FORMAT,"An invalid/unsupported VdpColorTableFormat value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_BLEND_FACTOR,"An invalid/unsupported VdpOutputSurfaceRenderBlendFactor value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_BLEND_EQUATION,"An invalid/unsupported VdpOutputSurfaceRenderBlendEquation value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_FLAG,"An invalid/unsupported flag value/combination was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_DECODER_PROFILE,"An invalid/unsupported VdpDecoderProfile value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_VIDEO_MIXER_FEATURE,"An invalid/unsupported VdpVideoMixerFeature value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_VIDEO_MIXER_PARAMETER ,"An invalid/unsupported VdpVideoMixerParameter value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_VIDEO_MIXER_ATTRIBUTE,"An invalid/unsupported VdpVideoMixerAttribute value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_VIDEO_MIXER_PICTURE_STRUCTURE,"An invalid/unsupported VdpVideoMixerPictureStructure value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_FUNC_ID,"An invalid/unsupported VdpFuncId value was supplied.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_SIZE,"The size of a supplied object does not match the object it is being used with.\
+							For example, a VdpVideoMixer is configured to process VdpVideoSurface objects of a specific size.\
+							If presented with a VdpVideoSurface of a different size, this error will be raised.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_VALUE,"An invalid/unsupported value was supplied.\
+							This is a catch-all error code for values of type other than those with a specific error code.");
+		_ERROR_TYPE(VDP_STATUS_INVALID_STRUCT_VERSION,"An invalid/unsupported structure version was specified in a versioned structure. \
+							This implies that the implementation is older than the header file the application was built against.");
+		_ERROR_TYPE(VDP_STATUS_RESOURCES,"The system does not have enough resources to complete the requested operation at this time.");
+		_ERROR_TYPE(VDP_STATUS_HANDLE_DEVICE_MISMATCH,"The set of handles supplied are not all related to the same VdpDevice.When performing operations \
+							that operate on multiple surfaces, such as VdpOutputSurfaceRenderOutputSurface or VdpVideoMixerRender, \
+							all supplied surfaces must have been created within the context of the same VdpDevice object. \
+							This error is raised if they were not.");
+		_ERROR_TYPE(VDP_STATUS_ERROR,"A catch-all error, used when no other error code applies.");
+	}
+}
diff --git a/src/gallium/state_trackers/vdpau/ftab.c b/src/gallium/state_trackers/vdpau/ftab.c
index 7e476e5ee28..1842c4da0ea 100644
--- a/src/gallium/state_trackers/vdpau/ftab.c
+++ b/src/gallium/state_trackers/vdpau/ftab.c
@@ -30,8 +30,8 @@
 
 static void* ftab[67] =
 {
-   0, /* VDP_FUNC_ID_GET_ERROR_STRING */
-   0, /* VDP_FUNC_ID_GET_PROC_ADDRESS */
+   &vlVdpGetErrorString, /* VDP_FUNC_ID_GET_ERROR_STRING */
+   &vlVdpGetProcAddress, /* VDP_FUNC_ID_GET_PROC_ADDRESS */
    &vlVdpGetApiVersion, /* VDP_FUNC_ID_GET_API_VERSION */
    0,
    &vlVdpGetInformationString, /* VDP_FUNC_ID_GET_INFORMATION_STRING */
@@ -40,15 +40,15 @@ static void* ftab[67] =
    &vlVdpVideoSurfaceQueryCapabilities, /* VDP_FUNC_ID_VIDEO_SURFACE_QUERY_CAPABILITIES */
    &vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities, /* VDP_FUNC_ID_VIDEO_SURFACE_QUERY_GET_PUT_BITS_Y_CB_CR_CAPABILITIES */
    &vlVdpVideoSurfaceCreate, /* VDP_FUNC_ID_VIDEO_SURFACE_CREATE */
-   0, /* VDP_FUNC_ID_VIDEO_SURFACE_DESTROY */
-   0, /* VDP_FUNC_ID_VIDEO_SURFACE_GET_PARAMETERS */
-   0, /* VDP_FUNC_ID_VIDEO_SURFACE_GET_BITS_Y_CB_CR */
-   0, /* VDP_FUNC_ID_VIDEO_SURFACE_PUT_BITS_Y_CB_CR */
+   &vlVdpVideoSurfaceDestroy, /* VDP_FUNC_ID_VIDEO_SURFACE_DESTROY */
+   &vlVdpVideoSurfaceGetParameters, /* VDP_FUNC_ID_VIDEO_SURFACE_GET_PARAMETERS */
+   &vlVdpVideoSurfaceGetBitsYCbCr, /* VDP_FUNC_ID_VIDEO_SURFACE_GET_BITS_Y_CB_CR */
+   &vlVdpVideoSurfacePutBitsYCbCr, /* VDP_FUNC_ID_VIDEO_SURFACE_PUT_BITS_Y_CB_CR */
    &vlVdpOutputSurfaceQueryCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_CAPABILITIES */
    &vlVdpOutputSurfaceQueryGetPutBitsNativeCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_GET_PUT_BITS_NATIVE_CAPABILITIES */
    0, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_PUT_BITS_INDEXED_CAPABILITIES */
    &vlVdpOutputSurfaceQueryPutBitsYCbCrCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_PUT_BITS_Y_CB_CR_CAPABILITIES */
-   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_CREATE */
+   &vlVdpOutputSurfaceCreate, /* VDP_FUNC_ID_OUTPUT_SURFACE_CREATE */
    0, /* VDP_FUNC_ID_OUTPUT_SURFACE_DESTROY */
    0, /* VDP_FUNC_ID_OUTPUT_SURFACE_GET_PARAMETERS */
    0, /* VDP_FUNC_ID_OUTPUT_SURFACE_GET_BITS_NATIVE */
@@ -56,10 +56,10 @@ static void* ftab[67] =
    0, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_INDEXED */
    0, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_Y_CB_CR */
    &vlVdpBitmapSurfaceQueryCapabilities, /* VDP_FUNC_ID_BITMAP_SURFACE_QUERY_CAPABILITIES */
-   0, /* VDP_FUNC_ID_BITMAP_SURFACE_CREATE */
-   0, /* VDP_FUNC_ID_BITMAP_SURFACE_DESTROY */
-   0, /* VDP_FUNC_ID_BITMAP_SURFACE_GET_PARAMETERS */
-   0, /* VDP_FUNC_ID_BITMAP_SURFACE_PUT_BITS_NATIVE */
+   &vlVdpBitmapSurfaceCreate, /* VDP_FUNC_ID_BITMAP_SURFACE_CREATE */
+   &vlVdpBitmapSurfaceDestroy, /* VDP_FUNC_ID_BITMAP_SURFACE_DESTROY */
+   &vlVdpBitmapSurfaceGetParameters, /* VDP_FUNC_ID_BITMAP_SURFACE_GET_PARAMETERS */
+   &vlVdpBitmapSurfacePutBitsNative, /* VDP_FUNC_ID_BITMAP_SURFACE_PUT_BITS_NATIVE */
    0,
    0,
    0,
@@ -67,10 +67,10 @@ static void* ftab[67] =
    0, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_BITMAP_SURFACE */
    0, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_VIDEO_SURFACE_LUMA */
    &vlVdpDecoderQueryCapabilities, /* VDP_FUNC_ID_DECODER_QUERY_CAPABILITIES */
-   0, /* VDP_FUNC_ID_DECODER_CREATE */
-   0, /* VDP_FUNC_ID_DECODER_DESTROY */
+   &vlVdpDecoderCreate, /* VDP_FUNC_ID_DECODER_CREATE */
+   &vlVdpDecoderDestroy, /* VDP_FUNC_ID_DECODER_DESTROY */
    0, /* VDP_FUNC_ID_DECODER_GET_PARAMETERS */
-   0, /* VDP_FUNC_ID_DECODER_RENDER */
+   &vlVdpDecoderRender, /* VDP_FUNC_ID_DECODER_RENDER */
    &vlVdpVideoMixerQueryFeatureSupport, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_FEATURE_SUPPORT */
    &vlVdpVideoMixerQueryParameterSupport, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_PARAMETER_SUPPORT */
    &vlVdpVideoMixerQueryAttributeSupport, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_ATTRIBUTE_SUPPORT */
@@ -85,17 +85,17 @@ static void* ftab[67] =
    0, /* VDP_FUNC_ID_VIDEO_MIXER_GET_ATTRIBUTE_VALUES */
    0, /* VDP_FUNC_ID_VIDEO_MIXER_DESTROY */
    0, /* VDP_FUNC_ID_VIDEO_MIXER_RENDER */
-   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_DESTROY */
-   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_CREATE */
-   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_DESTROY */
-   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_SET_BACKGROUND_COLOR */
-   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_GET_BACKGROUND_COLOR */
+   &vlVdpPresentationQueueTargetDestroy, /* VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_DESTROY */
+   &vlVdpPresentationQueueCreate, /* VDP_FUNC_ID_PRESENTATION_QUEUE_CREATE */
+   &vlVdpPresentationQueueDestroy, /* VDP_FUNC_ID_PRESENTATION_QUEUE_DESTROY */
+   &vlVdpPresentationQueueSetBackgroundColor, /* VDP_FUNC_ID_PRESENTATION_QUEUE_SET_BACKGROUND_COLOR */
+   &vlVdpPresentationQueueGetBackgroundColor, /* VDP_FUNC_ID_PRESENTATION_QUEUE_GET_BACKGROUND_COLOR */
    0,
    0,
-   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_GET_TIME */
-   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_DISPLAY */
-   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_BLOCK_UNTIL_SURFACE_IDLE */
-   0, /* VDP_FUNC_ID_PRESENTATION_QUEUE_QUERY_SURFACE_STATUS */
+   &vlVdpPresentationQueueGetTime, /* VDP_FUNC_ID_PRESENTATION_QUEUE_GET_TIME */
+   &vlVdpPresentationQueueDisplay, /* VDP_FUNC_ID_PRESENTATION_QUEUE_DISPLAY */
+   &vlVdpPresentationQueueBlockUntilSurfaceIdle, /* VDP_FUNC_ID_PRESENTATION_QUEUE_BLOCK_UNTIL_SURFACE_IDLE */
+   &vlVdpPresentationQueueQuerySurfaceStatus, /* VDP_FUNC_ID_PRESENTATION_QUEUE_QUERY_SURFACE_STATUS */
    0  /* VDP_FUNC_ID_PREEMPTION_CALLBACK_REGISTER */
 };
 
diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
new file mode 100644
index 00000000000..c5f06896c58
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -0,0 +1,43 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vdpau_private.h"
+#include <vdpau/vdpau.h>
+#include <util/u_debug.h>
+
+VdpStatus
+vlVdpOutputSurfaceCreate (	VdpDevice  device, 
+							VdpRGBAFormat  rgba_format, 
+							uint32_t width, uint32_t height, 
+							VdpOutputSurface  *surface)
+{
+	debug_printf("[VDPAU] Creating output surface\n");
+	if (!(width && height))
+		return VDP_STATUS_INVALID_SIZE;
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
\ No newline at end of file
diff --git a/src/gallium/state_trackers/vdpau/preemption.c b/src/gallium/state_trackers/vdpau/preemption.c
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
new file mode 100644
index 00000000000..8200cf04326
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -0,0 +1,121 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vdpau_private.h"
+#include <vdpau/vdpau.h>
+#include <util/u_debug.h>
+
+VdpStatus
+vlVdpPresentationQueueTargetDestroy (VdpPresentationQueueTarget  presentation_queue_target)
+{
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpPresentationQueueCreate (	VdpDevice  device, 
+								VdpPresentationQueueTarget  presentation_queue_target, 
+								VdpPresentationQueue  *presentation_queue)
+{
+	debug_printf("[VDPAU] Creating presentation queue\n");
+	
+	if (!presentation_queue)
+		return VDP_STATUS_INVALID_POINTER;
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpPresentationQueueDestroy (VdpPresentationQueue  presentation_queue)
+{
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpPresentationQueueSetBackgroundColor (	VdpPresentationQueue  presentation_queue, 
+											VdpColor  *const background_color)
+{
+	if (!background_color)
+		return VDP_STATUS_INVALID_POINTER;
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpPresentationQueueGetBackgroundColor (	VdpPresentationQueue  presentation_queue, 
+											VdpColor  *const background_color)
+{
+	if (!background_color)
+		return VDP_STATUS_INVALID_POINTER;
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpPresentationQueueGetTime (	VdpPresentationQueue  presentation_queue, 
+								VdpTime  *current_time)
+{
+	if (!current_time)
+		return VDP_STATUS_INVALID_POINTER;
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpPresentationQueueDisplay (	VdpPresentationQueue  presentation_queue, 
+								VdpOutputSurface  surface, 
+								uint32_t clip_width, 
+								uint32_t clip_height, 
+								VdpTime  earliest_presentation_time)
+{
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpPresentationQueueBlockUntilSurfaceIdle (	VdpPresentationQueue  presentation_queue, 
+												VdpOutputSurface  surface, 
+												VdpTime  *first_presentation_time)
+{
+	if (!first_presentation_time)
+		return VDP_STATUS_INVALID_POINTER;
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpPresentationQueueQuerySurfaceStatus (	VdpPresentationQueue  presentation_queue, 
+											VdpOutputSurface  surface, 
+											VdpPresentationQueueStatus  *status, 
+											VdpTime  *first_presentation_time)
+{
+	if (!(status && first_presentation_time))
+		return VDP_STATUS_INVALID_POINTER;
+	
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
\ No newline at end of file
diff --git a/src/gallium/state_trackers/vdpau/query.c b/src/gallium/state_trackers/vdpau/query.c
index eb7cfbcdd36..86b5098f178 100644
--- a/src/gallium/state_trackers/vdpau/query.c
+++ b/src/gallium/state_trackers/vdpau/query.c
@@ -31,6 +31,7 @@
 #include <pipe/p_screen.h>
 #include <pipe/p_defines.h>
 #include <math.h>
+#include <util/u_debug.h>
 
 
 VdpStatus
@@ -60,6 +61,8 @@ vlVdpVideoSurfaceQueryCapabilities(VdpDevice device, VdpChromaType surface_chrom
    struct vl_screen *vlscreen;
    uint32_t max_2d_texture_level;
    VdpStatus ret;
+   
+   debug_printf("[VDPAU] Querying video surfaces\n");
 
    if (!(is_supported && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
@@ -102,6 +105,8 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
 {
 	struct vl_screen *vlscreen;
 	
+	debug_printf("[VDPAU] Querying get put video surfaces\n");
+	
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
 
@@ -113,7 +118,7 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
    if (!vlscreen)
       return VDP_STATUS_RESOURCES;
 
-   if (bits_ycbcr_format != VDP_YCBCR_FORMAT_Y8U8V8A8) 
+   if (bits_ycbcr_format != VDP_YCBCR_FORMAT_Y8U8V8A8 && bits_ycbcr_format != VDP_YCBCR_FORMAT_V8U8Y8A8) 
 	                               *is_supported = vlscreen->pscreen->is_format_supported(vlscreen->pscreen,
                                    FormatToPipe(bits_ycbcr_format),
                                    PIPE_TEXTURE_2D,
@@ -135,6 +140,8 @@ vlVdpDecoderQueryCapabilities(VdpDevice device, VdpDecoderProfile profile,
    uint32_t max_decode_height;
    uint32_t max_2d_texture_level;
    struct vl_screen *vlscreen;
+   
+   debug_printf("[VDPAU] Querying decoder\n");
 	
    if (!(is_supported && max_level && max_macroblocks && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
@@ -178,9 +185,11 @@ vlVdpDecoderQueryCapabilities(VdpDevice device, VdpDecoderProfile profile,
 VdpStatus
 vlVdpOutputSurfaceQueryCapabilities(VdpDevice device, VdpRGBAFormat surface_rgba_format,
                                     VdpBool *is_supported, uint32_t *max_width, uint32_t *max_height)
-{
+{	
    if (!(is_supported && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
+	  
+   debug_printf("[VDPAU] Querying ouput surfaces\n");
 
    return VDP_STATUS_NO_IMPLEMENTATION;
 }
@@ -189,6 +198,8 @@ VdpStatus
 vlVdpOutputSurfaceQueryGetPutBitsNativeCapabilities(VdpDevice device, VdpRGBAFormat surface_rgba_format,
                                                     VdpBool *is_supported)
 {
+   debug_printf("[VDPAU] Querying output surfaces get put native cap\n");
+	
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
 
@@ -200,6 +211,7 @@ vlVdpOutputSurfaceQueryPutBitsYCbCrCapabilities(VdpDevice device, VdpRGBAFormat
                                                 VdpYCbCrFormat bits_ycbcr_format,
                                                 VdpBool *is_supported)
 {
+   debug_printf("[VDPAU] Querying output surfaces put ycrcb cap\n");
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
 
@@ -210,6 +222,7 @@ VdpStatus
 vlVdpBitmapSurfaceQueryCapabilities(VdpDevice device, VdpRGBAFormat surface_rgba_format,
                                     VdpBool *is_supported, uint32_t *max_width, uint32_t *max_height)
 {
+   debug_printf("[VDPAU] Querying bitmap surfaces\n");
    if (!(is_supported && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
 
@@ -220,6 +233,7 @@ VdpStatus
 vlVdpVideoMixerQueryFeatureSupport(VdpDevice device, VdpVideoMixerFeature feature,
                                    VdpBool *is_supported)
 {
+   debug_printf("[VDPAU] Querying mixer feature support\n");
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
 
diff --git a/src/gallium/state_trackers/vdpau/render.c b/src/gallium/state_trackers/vdpau/render.c
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index 18fe788f870..89437c89e44 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -30,6 +30,7 @@
 #include <pipe/p_state.h>
 #include <util/u_memory.h>
 #include <util/u_format.h>
+#include <stdio.h>
 
 VdpStatus
 vlVdpVideoSurfaceCreate(VdpDevice device,
@@ -38,6 +39,8 @@ vlVdpVideoSurfaceCreate(VdpDevice device,
 			uint32_t height, 
 			VdpVideoSurface *surface)
 {
+	printf("[VDPAU] Creating a surface\n");
+	
     vlVdpSurface *p_surf;
     VdpStatus ret;
 
-- 
cgit v1.2.3


From 966b836e2d5142e01b0286c864ca4a6f1be5b706 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@io.dk>
Date: Sun, 1 Aug 2010 11:10:19 +0200
Subject: Stubs for the bitstream mpeg2 decoder

---
 .../state_trackers/vdpau/mpeg2_bitstream_parser.c  |   0
 .../state_trackers/vdpau/mpeg2_bitstream_parser.h  |  33 ++++
 src/gallium/state_trackers/vdpau/surface.c         | 192 ---------------------
 3 files changed, 33 insertions(+), 192 deletions(-)
 create mode 100644 src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
 create mode 100644 src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
 delete mode 100644 src/gallium/state_trackers/vdpau/surface.c

diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
new file mode 100644
index 00000000000..85a4b2fdf01
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
@@ -0,0 +1,33 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef MPEG2_BITSTREAM_PARSER_H
+#define MPEG2_BITSTREAM_PARSER_H
+
+
+
+#endif // MPEG2_BITSTREAM_PARSER_H
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
deleted file mode 100644
index 89437c89e44..00000000000
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010 Thomas Balling Sørensen.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "vdpau_private.h"
-#include <pipe/p_screen.h>
-#include <pipe/p_state.h>
-#include <util/u_memory.h>
-#include <util/u_format.h>
-#include <stdio.h>
-
-VdpStatus
-vlVdpVideoSurfaceCreate(VdpDevice device,
-			VdpChromaType chroma_type, 
-			uint32_t width, 
-			uint32_t height, 
-			VdpVideoSurface *surface)
-{
-	printf("[VDPAU] Creating a surface\n");
-	
-    vlVdpSurface *p_surf;
-    VdpStatus ret;
-
-    if (!(width && height))
-      {
-         ret = VDP_STATUS_INVALID_SIZE;
-         goto inv_size;
-      }
-      
-
-    if (!vlCreateHTAB()) {
-       ret = VDP_STATUS_RESOURCES;
-       goto no_htab;
-    }
-
-   p_surf = CALLOC(1, sizeof(p_surf));
-   if (!p_surf) {
-      ret = VDP_STATUS_RESOURCES;
-      goto no_res;
-   }
-
-   vlVdpDevice *dev = vlGetDataHTAB(device);
-   if (!dev)  {
-      ret = VDP_STATUS_INVALID_HANDLE;
-      goto inv_device;
-   }
-
-   p_surf->chroma_format = FormatToPipe(chroma_type);
-   p_surf->device = dev;
-    
-   *surface = vlAddDataHTAB(p_surf);
-   if (*surface == 0) {
-      ret = VDP_STATUS_ERROR;
-      goto no_handle;
-   }
-   
-   return VDP_STATUS_OK;
-
-no_handle:
-   FREE(p_surf->psurface);
-inv_device:
-no_surf:
-   FREE(p_surf);
-no_res:
-   // vlDestroyHTAB(); XXX: Do not destroy this tab, I think.
-no_htab:
-inv_size:
-   return ret;
-}
-
-VdpStatus
-vlVdpVideoSurfaceDestroy  ( VdpVideoSurface surface )
-{
-   vlVdpSurface *p_surf;
-
-   p_surf = (vlVdpSurface *)vlGetDataHTAB((vlHandle)surface);
-   if (!p_surf)
-       return VDP_STATUS_INVALID_HANDLE;
-
-   if (p_surf->psurface)  {
-	   if (p_surf->psurface->texture)  {
-		   if (p_surf->psurface->texture->screen)
-				p_surf->psurface->texture->screen->tex_surface_destroy(p_surf->psurface);
-	   }
-   }
-   FREE(p_surf);
-   return VDP_STATUS_OK;
-}
-
-VdpStatus
-vlVdpVideoSurfaceGetParameters ( VdpVideoSurface surface, 
-				 VdpChromaType *chroma_type, 
-				 uint32_t *width, 
-				 uint32_t *height
-)
-{
-   if (!(width && height && chroma_type))  
-      return VDP_STATUS_INVALID_POINTER; 
-   
-   
-   vlVdpSurface *p_surf = vlGetDataHTAB(surface);
-   if (!p_surf) 
-      return VDP_STATUS_INVALID_HANDLE;
-
-
-   if (!(p_surf->chroma_format > 0 && p_surf->chroma_format < 3))  
-      return VDP_STATUS_INVALID_CHROMA_TYPE;
-
-   *width = p_surf->width;
-   *height = p_surf->height;
-   *chroma_type = PipeToType(p_surf->chroma_format);
-
-   return VDP_STATUS_OK;
-}
-
-VdpStatus
-vlVdpVideoSurfaceGetBitsYCbCr ( VdpVideoSurface surface, 
-				VdpYCbCrFormat destination_ycbcr_format, 
-				void *const *destination_data, 
-				uint32_t const *destination_pitches
-)
-{
-    if (!vlCreateHTAB()) 
-      return VDP_STATUS_RESOURCES;
-
-
-    vlVdpSurface *p_surf = vlGetDataHTAB(surface);
-    if (!p_surf) 
-       return VDP_STATUS_INVALID_HANDLE;
-	  
-	if (!p_surf->psurface)
-		return VDP_STATUS_RESOURCES;
-   
-   
-	return VDP_STATUS_OK;
-}
-
-VdpStatus
-vlVdpVideoSurfacePutBitsYCbCr ( VdpVideoSurface surface, 
-								VdpYCbCrFormat source_ycbcr_format, 
-								void const *const *source_data, 
-								uint32_t const *source_pitches
-)
-{
-	uint32_t size_surface_bytes;
-	const struct util_format_description *format_desc;
-	enum pipe_format pformat = FormatToPipe(source_ycbcr_format);
-	
-	if (!vlCreateHTAB()) 
-      return VDP_STATUS_RESOURCES;
-
-
-    vlVdpSurface *p_surf = vlGetDataHTAB(surface);
-    if (!p_surf) 
-       return VDP_STATUS_INVALID_HANDLE;
-	   
-	
-	//size_surface_bytes =  ( source_pitches[0] * p_surf->height util_format_get_blockheight(pformat) );   
-	/*util_format_translate(enum pipe_format dst_format,
-                      void *dst, unsigned dst_stride,
-                      unsigned dst_x, unsigned dst_y,
-                      enum pipe_format src_format,
-                      const void *src, unsigned src_stride,
-                      unsigned src_x, unsigned src_y,
-                      unsigned width, unsigned height);*/
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
-
-}
-- 
cgit v1.2.3


From 09a10be4db1e5605cb93a6e54d1475d4ebbaa3c0 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@io.dk>
Date: Wed, 4 Aug 2010 11:07:26 +0200
Subject: Fixed an endianproblem

---
 src/gallium/auxiliary/vl/vl_bitstream_parser.c | 47 ++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.c b/src/gallium/auxiliary/vl/vl_bitstream_parser.c
index 3193ea5f41c..f07b3443b92 100644
--- a/src/gallium/auxiliary/vl/vl_bitstream_parser.c
+++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.c
@@ -29,17 +29,58 @@
 #include <assert.h>
 #include <limits.h>
 #include <util/u_memory.h>
+#include <stdio.h>
+
+inline void endian_swap_ushort(unsigned short *x)
+{
+    x[0] = (x[0]>>8) | 
+        (x[0]<<8);
+}
+
+inline void endian_swap_uint(unsigned int *x)
+{
+    x[0] = (x[0]>>24) | 
+        ((x[0]<<8) & 0x00FF0000) |
+        ((x[0]>>8) & 0x0000FF00) |
+        (x[0]<<24);
+}
+
+inline void endian_swap_ulonglong(unsigned long long *x)
+{
+    x[0] = (x[0]>>56) | 
+        ((x[0]<<40) & 0x00FF000000000000) |
+        ((x[0]<<24) & 0x0000FF0000000000) |
+        ((x[0]<<8)  & 0x000000FF00000000) |
+        ((x[0]>>8)  & 0x00000000FF000000) |
+        ((x[0]>>24) & 0x0000000000FF0000) |
+        ((x[0]>>40) & 0x000000000000FF00) |
+        (x[0]<<56);
+}
 
 static unsigned
 grab_bits(unsigned cursor, unsigned how_many_bits, unsigned bitstream_elt)
 {
-   unsigned excess_bits = sizeof(unsigned) * CHAR_BIT - how_many_bits - cursor;
+   unsigned excess_bits = sizeof(unsigned) * CHAR_BIT - how_many_bits;
 	
    assert(cursor < sizeof(unsigned) * CHAR_BIT);
    assert(how_many_bits > 0 && how_many_bits <= sizeof(unsigned) * CHAR_BIT);
    assert(cursor + how_many_bits <= sizeof(unsigned) * CHAR_BIT);
-
-   return (bitstream_elt << excess_bits) >> (excess_bits + cursor);
+   
+   #ifndef PIPE_ARCH_BIG_ENDIAN 
+   switch (sizeof(unsigned))  {
+	   case 2:
+			endian_swap_ushort(&bitstream_elt);
+			break;
+	   case 4:
+			endian_swap_uint(&bitstream_elt);
+			break;
+	   case 8:
+			endian_swap_ulonglong(&bitstream_elt);
+			break;
+   }
+   #endif // !PIPE_ARCH_BIG_ENDIAN 
+   
+	return (bitstream_elt << cursor) >> (excess_bits);
 }
 
 static unsigned
-- 
cgit v1.2.3


From 5386a8a2e012aafa8a2a02df83e2c4c19ec1f8f5 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Tue, 21 Sep 2010 15:23:52 +0200
Subject: vl: Various cleanups. Need to start from scratch with bitstream
 parser

---
 src/gallium/state_trackers/vdpau/header.c | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/gallium/state_trackers/vdpau/header.c

diff --git a/src/gallium/state_trackers/vdpau/header.c b/src/gallium/state_trackers/vdpau/header.c
new file mode 100644
index 00000000000..e69de29bb2d
-- 
cgit v1.2.3


From c5b6f7d16699cfda696538890a9c1744847bb434 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Tue, 21 Sep 2010 19:20:00 +0200
Subject: vl: Made the project compile again.

---
 src/gallium/state_trackers/vdpau/Makefile        |   4 +-
 src/gallium/state_trackers/vdpau/surface.c       | 192 +++++++++++++++++++++++
 src/gallium/state_trackers/vdpau/vdpau_private.h |  21 ++-
 3 files changed, 213 insertions(+), 4 deletions(-)
 create mode 100644 src/gallium/state_trackers/vdpau/surface.c

diff --git a/src/gallium/state_trackers/vdpau/Makefile b/src/gallium/state_trackers/vdpau/Makefile
index a1b83abc6dd..6313ef34b38 100644
--- a/src/gallium/state_trackers/vdpau/Makefile
+++ b/src/gallium/state_trackers/vdpau/Makefile
@@ -16,7 +16,9 @@ C_SOURCES = htab.c \
 	    device.c \
 	    query.c \
 	    surface.c \
-	    decode.c
+	    decode.c \
+	    presentation.c \
+	    bitmap.c
 
 
 include ../../Makefile.template
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
new file mode 100644
index 00000000000..f957d94bdf7
--- /dev/null
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -0,0 +1,192 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vdpau_private.h"
+#include <pipe/p_screen.h>
+#include <pipe/p_state.h>
+#include <util/u_memory.h>
+#include <util/u_format.h>
+#include <stdio.h>
+
+VdpStatus
+vlVdpVideoSurfaceCreate(VdpDevice device,
+ VdpChromaType chroma_type,
+ uint32_t width,
+ uint32_t height,
+ VdpVideoSurface *surface)
+{
+ printf("[VDPAU] Creating a surface\n");
+
+ vlVdpSurface *p_surf;
+ VdpStatus ret;
+
+ if (!(width && height))
+ {
+ ret = VDP_STATUS_INVALID_SIZE;
+ goto inv_size;
+ }
+
+
+ if (!vlCreateHTAB()) {
+ ret = VDP_STATUS_RESOURCES;
+ goto no_htab;
+ }
+
+ p_surf = CALLOC(1, sizeof(p_surf));
+ if (!p_surf) {
+ ret = VDP_STATUS_RESOURCES;
+ goto no_res;
+ }
+
+ vlVdpDevice *dev = vlGetDataHTAB(device);
+ if (!dev) {
+ ret = VDP_STATUS_INVALID_HANDLE;
+ goto inv_device;
+ }
+
+ p_surf->chroma_format = FormatToPipe(chroma_type);
+ p_surf->device = dev;
+
+ *surface = vlAddDataHTAB(p_surf);
+ if (*surface == 0) {
+ ret = VDP_STATUS_ERROR;
+ goto no_handle;
+ }
+
+ return VDP_STATUS_OK;
+
+no_handle:
+ FREE(p_surf->psurface);
+inv_device:
+no_surf:
+ FREE(p_surf);
+no_res:
+ // vlDestroyHTAB(); XXX: Do not destroy this tab, I think.
+no_htab:
+inv_size:
+ return ret;
+}
+
+VdpStatus
+vlVdpVideoSurfaceDestroy ( VdpVideoSurface surface )
+{
+ vlVdpSurface *p_surf;
+
+ p_surf = (vlVdpSurface *)vlGetDataHTAB((vlHandle)surface);
+ if (!p_surf)
+ return VDP_STATUS_INVALID_HANDLE;
+
+ if (p_surf->psurface) {
+ if (p_surf->psurface->texture) {
+ if (p_surf->psurface->texture->screen)
+ p_surf->psurface->texture->screen->tex_surface_destroy(p_surf->psurface);
+ }
+ }
+ FREE(p_surf);
+ return VDP_STATUS_OK;
+}
+
+VdpStatus
+vlVdpVideoSurfaceGetParameters ( VdpVideoSurface surface,
+ VdpChromaType *chroma_type,
+ uint32_t *width,
+ uint32_t *height
+)
+{
+ if (!(width && height && chroma_type))
+ return VDP_STATUS_INVALID_POINTER;
+
+
+ vlVdpSurface *p_surf = vlGetDataHTAB(surface);
+ if (!p_surf)
+ return VDP_STATUS_INVALID_HANDLE;
+
+
+ if (!(p_surf->chroma_format > 0 && p_surf->chroma_format < 3))
+ return VDP_STATUS_INVALID_CHROMA_TYPE;
+
+ *width = p_surf->width;
+ *height = p_surf->height;
+ *chroma_type = PipeToType(p_surf->chroma_format);
+
+ return VDP_STATUS_OK;
+}
+
+VdpStatus
+vlVdpVideoSurfaceGetBitsYCbCr ( VdpVideoSurface surface,
+ VdpYCbCrFormat destination_ycbcr_format,
+ void *const *destination_data,
+ uint32_t const *destination_pitches
+)
+{
+ if (!vlCreateHTAB())
+ return VDP_STATUS_RESOURCES;
+
+
+ vlVdpSurface *p_surf = vlGetDataHTAB(surface);
+ if (!p_surf)
+ return VDP_STATUS_INVALID_HANDLE;
+
+ if (!p_surf->psurface)
+ return VDP_STATUS_RESOURCES;
+
+
+ return VDP_STATUS_OK;
+}
+
+VdpStatus
+vlVdpVideoSurfacePutBitsYCbCr ( VdpVideoSurface surface,
+ VdpYCbCrFormat source_ycbcr_format,
+ void const *const *source_data,
+ uint32_t const *source_pitches
+)
+{
+ uint32_t size_surface_bytes;
+ const struct util_format_description *format_desc;
+ enum pipe_format pformat = FormatToPipe(source_ycbcr_format);
+
+ if (!vlCreateHTAB())
+ return VDP_STATUS_RESOURCES;
+
+
+ vlVdpSurface *p_surf = vlGetDataHTAB(surface);
+ if (!p_surf)
+ return VDP_STATUS_INVALID_HANDLE;
+
+
+ //size_surface_bytes = ( source_pitches[0] * p_surf->height util_format_get_blockheight(pformat) );
+ /*util_format_translate(enum pipe_format dst_format,
+ void *dst, unsigned dst_stride,
+ unsigned dst_x, unsigned dst_y,
+ enum pipe_format src_format,
+ const void *src, unsigned src_stride,
+ unsigned src_x, unsigned src_y,
+ unsigned width, unsigned height);*/
+
+ return VDP_STATUS_NO_IMPLEMENTATION;
+
+}
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 566c99266ed..635d6c8acdb 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -161,8 +161,9 @@ typedef struct
 {
 	vlVdpDevice *device;
 	struct vl_screen *vlscreen;
-    struct vl_context *vctx;
+    	struct vl_context *vctx;
 	enum pipe_video_chroma_format chroma_format;
+	enum pipe_video_profile profile;
 } vlVdpDecoder;
 
 typedef uint32_t vlHandle;
@@ -173,6 +174,7 @@ vlHandle vlAddDataHTAB(void *data);
 void* vlGetDataHTAB(vlHandle handle);
 boolean vlGetFuncFTAB(VdpFuncId function_id, void **func);
 
+VdpGetErrorString vlVdpGetErrorString;
 VdpDeviceDestroy vlVdpDeviceDestroy;
 VdpGetProcAddress vlVdpGetProcAddress;
 VdpGetApiVersion vlVdpGetApiVersion;
@@ -197,5 +199,18 @@ VdpVideoSurfacePutBitsYCbCr vlVdpVideoSurfacePutBitsYCbCr;
 VdpDecoderCreate vlVdpDecoderCreate;
 VdpDecoderDestroy vlVdpDecoderDestroy;
 VdpDecoderRender vlVdpDecoderRender;
-
-#endif // VDPAU_PRIVATE_H
\ No newline at end of file
+VdpOutputSurfaceCreate vlVdpOutputSurfaceCreate;
+VdpBitmapSurfaceCreate vlVdpBitmapSurfaceCreate;
+VdpBitmapSurfaceDestroy vlVdpBitmapSurfaceDestroy;
+VdpBitmapSurfaceGetParameters vlVdpBitmapSurfaceGetParameters;
+VdpBitmapSurfacePutBitsNative vlVdpBitmapSurfacePutBitsNative;
+VdpPresentationQueueTargetDestroy vlVdpPresentationQueueTargetDestroy;
+VdpPresentationQueueCreate vlVdpPresentationQueueCreate;
+VdpPresentationQueueDestroy vlVdpPresentationQueueDestroy;
+VdpPresentationQueueSetBackgroundColor vlVdpPresentationQueueSetBackgroundColor;
+VdpPresentationQueueGetBackgroundColor vlVdpPresentationQueueGetBackgroundColor;
+VdpPresentationQueueGetTime vlVdpPresentationQueueGetTime;
+VdpPresentationQueueDisplay vlVdpPresentationQueueDisplay;
+VdpPresentationQueueBlockUntilSurfaceIdle vlVdpPresentationQueueBlockUntilSurfaceIdle;
+VdpPresentationQueueQuerySurfaceStatus vlVdpPresentationQueueQuerySurfaceStatus;
+#endif // VDPAU_PRIVATE_H
-- 
cgit v1.2.3


From a90bdd09b6b342c3ff8d2c80480805f9614fabb3 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Tue, 21 Sep 2010 19:44:30 +0200
Subject: vl: Made vdpauinfo run again

---
 src/gallium/state_trackers/vdpau/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/vdpau/Makefile b/src/gallium/state_trackers/vdpau/Makefile
index 6313ef34b38..ae54ae6a7ef 100644
--- a/src/gallium/state_trackers/vdpau/Makefile
+++ b/src/gallium/state_trackers/vdpau/Makefile
@@ -18,7 +18,8 @@ C_SOURCES = htab.c \
 	    surface.c \
 	    decode.c \
 	    presentation.c \
-	    bitmap.c
+	    bitmap.c \
+	    output.c
 
 
 include ../../Makefile.template
-- 
cgit v1.2.3


From 8291db1cdb9d8e8d02a9c1a7ce34e6a23b8238ff Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Mon, 27 Sep 2010 22:45:05 +0200
Subject: vl: Renamed function to appropriate name.

---
 src/gallium/state_trackers/vdpau/decode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index ec3995b98db..e03bc35ed68 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -166,7 +166,7 @@ vlVdpCreateSurfaceTarget   (vlVdpDecoder *vldecoder,
 }
 
 static void
-vlVdpMacroBlocksToPipe(struct pipe_screen *screen,
+vlVdpBitstreamToMacroblocks(struct pipe_screen *screen,
                   VdpBitstreamBuffer const *bitstream_buffers,
                   unsigned int num_macroblocks,
                   struct pipe_mpeg12_macroblock *pipe_macroblocks)
@@ -221,7 +221,7 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 	num_macroblocks = bitstream_buffer_count;
 	struct pipe_mpeg12_macroblock pipe_macroblocks[num_macroblocks];
 	
-	vlVdpMacroBlocksToPipe(vpipe->screen, bitstream_buffers,
+	vlVdpBitstreamToMacroblocks(vpipe->screen, bitstream_buffers,
                      num_macroblocks, pipe_macroblocks);
 		
 	vpipe->set_decode_target(vpipe,t_surf);
-- 
cgit v1.2.3


From cac5e60fd3fa7b756bcd4174db8096335c70e145 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Thu, 30 Sep 2010 15:58:57 +0200
Subject: vl: moved some functions to more appropriate places

---
 src/gallium/state_trackers/vdpau/Makefile          |  1 +
 src/gallium/state_trackers/vdpau/decode.c          | 24 ++++---------
 .../state_trackers/vdpau/mpeg2_bitstream_parser.c  | 42 ++++++++++++++++++++++
 .../state_trackers/vdpau/mpeg2_bitstream_parser.h  |  8 +++++
 4 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/Makefile b/src/gallium/state_trackers/vdpau/Makefile
index ae54ae6a7ef..ad37676b95e 100644
--- a/src/gallium/state_trackers/vdpau/Makefile
+++ b/src/gallium/state_trackers/vdpau/Makefile
@@ -19,6 +19,7 @@ C_SOURCES = htab.c \
 	    decode.c \
 	    presentation.c \
 	    bitmap.c \
+	    mpeg2_bitstream_parser.c \
 	    output.c
 
 
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index e03bc35ed68..3e7cb4a3cab 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -26,6 +26,7 @@
  **************************************************************************/
 
 #include "vdpau_private.h"
+#include "mpeg2_bitstream_parser.h"
 #include <util/u_memory.h>
 #include <util/u_math.h>
 #include <pipe/p_video_context.h>
@@ -165,15 +166,6 @@ vlVdpCreateSurfaceTarget   (vlVdpDecoder *vldecoder,
 	return VDP_STATUS_OK;
 }
 
-static void
-vlVdpBitstreamToMacroblocks(struct pipe_screen *screen,
-                  VdpBitstreamBuffer const *bitstream_buffers,
-                  unsigned int num_macroblocks,
-                  struct pipe_mpeg12_macroblock *pipe_macroblocks)
-{
-	debug_printf("NAF!\n");
-}
-
 VdpStatus
 vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 							vlVdpSurface *vlsurf,
@@ -190,6 +182,7 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 	struct pipe_surface *p_surf;
 	struct pipe_surface *f_surf;
 	uint32_t num_macroblocks;
+	struct pipe_mpeg12_macroblock *pipe_macroblocks;
 	VdpStatus ret;
 	
 
@@ -217,15 +210,12 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 	if (f_vdp_surf ==  VDP_INVALID_HANDLE) f_vdp_surf = NULL;
 	
 	ret = vlVdpCreateSurfaceTarget(vldecoder,t_vdp_surf);
-		
-	num_macroblocks = bitstream_buffer_count;
-	struct pipe_mpeg12_macroblock pipe_macroblocks[num_macroblocks];
-	
-	vlVdpBitstreamToMacroblocks(vpipe->screen, bitstream_buffers,
-                     num_macroblocks, pipe_macroblocks);
+
+	vlVdpBitstreamToMacroblock(vpipe->screen, bitstream_buffers,
+                     &num_macroblocks, &pipe_macroblocks);
 		
 	vpipe->set_decode_target(vpipe,t_surf);
-	vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks, &pipe_macroblocks->base, NULL);
+	vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks, pipe_macroblocks, NULL);
 	return ret;
 }
 
@@ -284,4 +274,4 @@ vlVdpDecoderRender (VdpDecoder decoder,
 	assert(0);
 
 	return ret;
-}
\ No newline at end of file
+}
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
index e69de29bb2d..c6d5846be52 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
@@ -0,0 +1,42 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "mpeg2_bitstream_parser.h"
+
+void
+vlVdpBitstreamToMacroblock (
+		  struct pipe_screen *screen,
+                  VdpBitstreamBuffer const *bitstream_buffers,
+                  unsigned int *num_macroblocks,
+                  struct pipe_mpeg12_macroblock **pipe_macroblocks)
+{
+	debug_printf("[VDPAU] BitstreamToMacroblock not implemented yet");
+	assert(0);
+
+	return;
+}
+
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
index 85a4b2fdf01..534503df53f 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
@@ -28,6 +28,14 @@
 #ifndef MPEG2_BITSTREAM_PARSER_H
 #define MPEG2_BITSTREAM_PARSER_H
 
+#include <vdpau/vdpau.h>
+#include <pipe/p_video_state.h>
+#include "vdpau_private.h"
 
+void
+vlVdpBitstreamToMacroblock(struct pipe_screen *screen,
+                  VdpBitstreamBuffer const *bitstream_buffers,
+                  unsigned int *num_macroblocks,
+                  struct pipe_mpeg12_macroblock **pipe_macroblocks);
 
 #endif // MPEG2_BITSTREAM_PARSER_H
-- 
cgit v1.2.3


From 63b1525cf0a50e3d31328c3b56355a86056e4c05 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Tue, 5 Oct 2010 11:06:02 +0200
Subject: vl: ...

---
 src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
index c6d5846be52..39019660edd 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
@@ -30,13 +30,15 @@
 void
 vlVdpBitstreamToMacroblock (
 		  struct pipe_screen *screen,
-                  VdpBitstreamBuffer const *bitstream_buffers,
-                  unsigned int *num_macroblocks,
-                  struct pipe_mpeg12_macroblock **pipe_macroblocks)
+		  VdpBitstreamBuffer const *bitstream_buffers,
+          unsigned int *num_macroblocks,
+          struct pipe_mpeg12_macroblock **pipe_macroblocks)
 {
 	debug_printf("[VDPAU] BitstreamToMacroblock not implemented yet");
 	assert(0);
 
+	
+
 	return;
 }
 
-- 
cgit v1.2.3


From d64d6f7712e5e8d8f962de3455a71fce8b2a8f78 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Tue, 5 Oct 2010 14:25:29 +0200
Subject: vl: changed video pipe to use the new gallium API within master

---
 src/gallium/auxiliary/vl/vl_bitstream_parser.h   |   4 +
 src/gallium/auxiliary/vl/vl_compositor.c         |   4 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  30 +--
 src/gallium/drivers/softpipe/sp_video_context.c  |  52 ++---
 src/gallium/include/pipe/p_video_context.h       |  18 +-
 src/gallium/state_trackers/vdpau/query.c         |   1 +
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c       |   2 +-
 src/glsl/glcpp/glcpp-parse.c                     | 234 +++++++++++------------
 src/glsl/glcpp/glcpp-parse.h                     |   7 +-
 9 files changed, 179 insertions(+), 173 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.h b/src/gallium/auxiliary/vl/vl_bitstream_parser.h
index 30ec743fa75..eeb51dd4295 100644
--- a/src/gallium/auxiliary/vl/vl_bitstream_parser.h
+++ b/src/gallium/auxiliary/vl/vl_bitstream_parser.h
@@ -39,6 +39,10 @@ struct vl_bitstream_parser
    unsigned cursor;
 };
 
+inline void endian_swap_ushort(unsigned short *x);
+inline void endian_swap_uint(unsigned int *x);
+inline void endian_swap_ulonglong(unsigned long long *x);
+
 bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser,
                               unsigned num_bitstreams,
                               const void **bitstreams,
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 415dc92555f..ee7bf070037 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -555,7 +555,9 @@ static void draw_layers(struct vl_compositor *c,
 
       c->pipe->bind_fs_state(c->pipe, frag_shaders[i]);
       c->pipe->set_fragment_sampler_views(c->pipe, 1, &surface_view);
-      c->pipe->draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
+	  
+
+	  util_draw_arrays(c->pipe,PIPE_PRIM_TRIANGLES,i * 6,6);
 
       if (delete_view) {
          pipe_sampler_view_reference(&surface_view, NULL);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index e9024e4a409..8a8c155e8ec 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -1039,6 +1039,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    unsigned vb_start = 0;
    struct vertex_shader_consts *vs_consts;
    struct pipe_transfer *buf_transfer;
+   
    unsigned i;
 
    assert(r);
@@ -1065,6 +1066,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
                                 r->vs_const_buf);
+								
 
    if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
@@ -1074,8 +1076,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->i_vs);
       r->pipe->bind_fs_state(r->pipe, r->i_fs);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
+	  util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
+	  
       vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
    }
 
@@ -1089,8 +1091,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
       r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
+	  util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
+
       vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
    }
 
@@ -1104,8 +1106,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
       r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
+	  util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
+
       vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
    }
 
@@ -1119,8 +1121,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
       r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
+      util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
+
       vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
    }
 
@@ -1134,8 +1136,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
       r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
+      util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
+
       vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
    }
 
@@ -1151,8 +1153,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
       r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
+      util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
+
       vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
    }
 
@@ -1168,8 +1170,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
       r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
+      util_draw_arrays(r->pipe,PIPE_PRIM_TRIANGLES,vb_start,num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
+
       vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
    }
 
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 44df00e0b78..419ba946b89 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -33,6 +33,7 @@
 #include <util/u_memory.h>
 #include <util/u_rect.h>
 #include <util/u_video.h>
+#include <util/u_surface.h>
 #include "sp_public.h"
 #include "sp_texture.h"
 
@@ -97,8 +98,8 @@ sp_mpeg12_is_format_supported(struct pipe_video_context *vpipe,
    if (geom & PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO)
       return FALSE;
 
-   return ctx->pipe->screen->is_format_supported(ctx->pipe->screen, PIPE_TEXTURE_2D,
-                                                 format, usage, geom);
+   return ctx->pipe->screen->is_format_supported(ctx->pipe->screen, format, PIPE_TEXTURE_2D, 1,
+                                                  usage, geom);
 }
 
 static void
@@ -125,29 +126,31 @@ sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
 }
 
 static void
-sp_mpeg12_surface_fill(struct pipe_video_context *vpipe,
+sp_mpeg12_clear_render_target(struct pipe_video_context *vpipe,
                        struct pipe_surface *dst,
                        unsigned dstx, unsigned dsty,
-                       unsigned width, unsigned height,
-                       unsigned value)
+					   const float *rgba,
+                       unsigned width, unsigned height)
 {
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
 
    assert(vpipe);
    assert(dst);
 
-   if (ctx->pipe->surface_fill)
-      ctx->pipe->surface_fill(ctx->pipe, dst, dstx, dsty, width, height, value);
+   if (ctx->pipe->clear_render_target)
+      ctx->pipe->clear_render_target(ctx->pipe, dst, rgba, dstx, dsty, width, height);
    else
-      util_surface_fill(ctx->pipe, dst, dstx, dsty, width, height, value);
+      util_clear_render_target(ctx->pipe, dst, rgba, dstx, dsty, width, height);
 }
 
 static void
-sp_mpeg12_surface_copy(struct pipe_video_context *vpipe,
-                       struct pipe_surface *dst,
-                       unsigned dstx, unsigned dsty,
-                       struct pipe_surface *src,
-                       unsigned srcx, unsigned srcy,
+sp_mpeg12_resource_copy_region(struct pipe_video_context *vpipe,
+                       struct pipe_resource *dst,
+					   struct pipe_subresource subdst,
+                       unsigned dstx, unsigned dsty, unsigned dstz,
+                       struct pipe_resource *src,
+					   struct pipe_subresource subsrc,
+                       unsigned srcx, unsigned srcy, unsigned srcz,
                        unsigned width, unsigned height)
 {
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
@@ -155,10 +158,10 @@ sp_mpeg12_surface_copy(struct pipe_video_context *vpipe,
    assert(vpipe);
    assert(dst);
 
-   if (ctx->pipe->surface_copy)
-      ctx->pipe->surface_copy(ctx->pipe, dst, dstx, dsty, src, srcx, srcy, width, height);
+   if (ctx->pipe->resource_copy_region)
+      ctx->pipe->resource_copy_region(ctx->pipe, dst, subdst, dstx, dsty, dstz, src, subsrc, srcx, srcy, srcz, width, height);
    else
-      util_surface_copy(ctx->pipe, FALSE, dst, dstx, dsty, src, srcx, srcy, width, height);
+      util_resource_copy_region(ctx->pipe, dst, subdst, dstx, dsty, dstz, src, subsrc, srcx, srcy, srcz, width, height);
 }
 
 static struct pipe_transfer*
@@ -339,12 +342,9 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
    rast.flatshade = 1;
    rast.flatshade_first = 0;
    rast.light_twoside = 0;
-   rast.front_winding = PIPE_WINDING_CCW;
-   rast.cull_mode = PIPE_WINDING_CW;
-   rast.fill_cw = PIPE_POLYGON_MODE_FILL;
-   rast.fill_ccw = PIPE_POLYGON_MODE_FILL;
-   rast.offset_cw = 0;
-   rast.offset_ccw = 0;
+   rast.cull_face = PIPE_FACE_FRONT;
+   rast.fill_front = PIPE_POLYGON_MODE_FILL;
+   rast.fill_back = PIPE_POLYGON_MODE_FILL;
    rast.scissor = 0;
    rast.poly_smooth = 0;
    rast.poly_stipple_enable = 0;
@@ -359,13 +359,15 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
    rast.line_width = 1;
    rast.point_smooth = 0;
    rast.point_quad_rasterization = 0;
-   rast.point_size = 1;
+   rast.point_size_per_vertex = 1;
    rast.offset_units = 1;
    rast.offset_scale = 1;
    rast.gl_rasterization_rules = 1;
+   
    ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
    ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
 
+
    blend.independent_blend_enable = 0;
    blend.rt[0].blend_enable = 0;
    blend.rt[0].rgb_func = PIPE_BLEND_ADD;
@@ -432,8 +434,8 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
    ctx->base.is_format_supported = sp_mpeg12_is_format_supported;
    ctx->base.decode_macroblocks = sp_mpeg12_decode_macroblocks;
    ctx->base.render_picture = sp_mpeg12_render_picture;
-   ctx->base.surface_fill = sp_mpeg12_surface_fill;
-   ctx->base.surface_copy = sp_mpeg12_surface_copy;
+   ctx->base.clear_render_target = sp_mpeg12_clear_render_target;
+   ctx->base.resource_copy_region = sp_mpeg12_resource_copy_region;
    ctx->base.get_transfer = sp_mpeg12_get_transfer;
    ctx->base.transfer_destroy = sp_mpeg12_transfer_destroy;
    ctx->base.transfer_map = sp_mpeg12_transfer_map;
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 294dc464c36..21ed4d579cf 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -101,17 +101,19 @@ struct pipe_video_context
                           struct pipe_video_rect        *dst_area,
                           struct pipe_fence_handle      **fence);
 
-   void (*surface_fill)(struct pipe_video_context *vpipe,
+   void (*clear_render_target)(struct pipe_video_context *vpipe,
                         struct pipe_surface *dst,
                         unsigned dstx, unsigned dsty,
-                        unsigned width, unsigned height,
-                        unsigned value);
+						const float *rgba,
+                        unsigned width, unsigned height);
 
-   void (*surface_copy)(struct pipe_video_context *vpipe,
-                        struct pipe_surface *dst,
-                        unsigned dstx, unsigned dsty,
-                        struct pipe_surface *src,
-                        unsigned srcx, unsigned srcy,
+   void (*resource_copy_region)(struct pipe_video_context *vpipe,
+                        struct pipe_resource *dst,
+						struct pipe_subresource subdst,
+                        unsigned dstx, unsigned dsty, unsigned dstz,
+                        struct pipe_resource *src,
+						struct pipe_subresource subsrc,
+                        unsigned srcx, unsigned srcy, unsigned srcz,
                         unsigned width, unsigned height);
 
    struct pipe_transfer *(*get_transfer)(struct pipe_video_context *vpipe,
diff --git a/src/gallium/state_trackers/vdpau/query.c b/src/gallium/state_trackers/vdpau/query.c
index 86b5098f178..a3a8500a6f7 100644
--- a/src/gallium/state_trackers/vdpau/query.c
+++ b/src/gallium/state_trackers/vdpau/query.c
@@ -122,6 +122,7 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
 	                               *is_supported = vlscreen->pscreen->is_format_supported(vlscreen->pscreen,
                                    FormatToPipe(bits_ycbcr_format),
                                    PIPE_TEXTURE_2D,
+								   1,
                                    PIPE_BIND_RENDER_TARGET, 
                                    PIPE_TEXTURE_GEOM_NON_SQUARE );
 								   
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 0a7f324a77c..cc80583f088 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -82,7 +82,7 @@ vl_drawable_surface_get(struct vl_screen *vscreen, Drawable drawable)
    templat.height0 = height;
    templat.depth0 = 1;
    templat.usage = PIPE_USAGE_DEFAULT;
-   templat.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_BLIT_SOURCE;
+   templat.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET;
    templat.flags = 0;
 
    drawable_tex = vscreen->pscreen->resource_create(vscreen->pscreen, &templat);
diff --git a/src/glsl/glcpp/glcpp-parse.c b/src/glsl/glcpp/glcpp-parse.c
index 1773ca5c13d..899e7841b3d 100644
--- a/src/glsl/glcpp/glcpp-parse.c
+++ b/src/glsl/glcpp/glcpp-parse.c
@@ -1,9 +1,10 @@
-/* A Bison parser, made by GNU Bison 2.4.3.  */
+
+/* A Bison parser, made by GNU Bison 2.4.1.  */
 
 /* Skeleton implementation for Bison's Yacc-like parsers in C
    
-      Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-   2009, 2010 Free Software Foundation, Inc.
+      Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+   Free Software Foundation, Inc.
    
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -45,7 +46,7 @@
 #define YYBISON 1
 
 /* Bison version.  */
-#define YYBISON_VERSION "2.4.3"
+#define YYBISON_VERSION "2.4.1"
 
 /* Skeleton name.  */
 #define YYSKELETON_NAME "yacc.c"
@@ -219,7 +220,7 @@ add_builtin_define(glcpp_parser_t *parser, const char *name, int value);
 
 
 /* Line 189 of yacc.c  */
-#line 223 "glcpp/glcpp-parse.c"
+#line 224 "glcpp/glcpp-parse.c"
 
 /* Enabling traces.  */
 #ifndef YYDEBUG
@@ -307,7 +308,7 @@ typedef struct YYLTYPE
 
 
 /* Line 264 of yacc.c  */
-#line 311 "glcpp/glcpp-parse.c"
+#line 312 "glcpp/glcpp-parse.c"
 
 #ifdef short
 # undef short
@@ -357,7 +358,7 @@ typedef short int yytype_int16;
 #define YYSIZE_MAXIMUM ((YYSIZE_T) -1)
 
 #ifndef YY_
-# if defined YYENABLE_NLS && YYENABLE_NLS
+# if YYENABLE_NLS
 #  if ENABLE_NLS
 #   include <libintl.h> /* INFRINGES ON USER NAME SPACE */
 #   define YY_(msgid) dgettext ("bison-runtime", msgid)
@@ -945,18 +946,9 @@ static const yytype_uint8 yystos[] =
 
 /* Like YYERROR except do call yyerror.  This remains here temporarily
    to ease the transition to the new meaning of YYERROR, for GCC.
-   Once GCC version 2 has supplanted version 1, this can go.  However,
-   YYFAIL appears to be in use.  Nevertheless, it is formally deprecated
-   in Bison 2.4.2's NEWS entry, where a plan to phase it out is
-   discussed.  */
+   Once GCC version 2 has supplanted version 1, this can go.  */
 
 #define YYFAIL		goto yyerrlab
-#if defined YYFAIL
-  /* This is here to suppress warnings from the GCC cpp's
-     -Wunused-macros.  Normally we don't worry about that warning, but
-     some users do, and we want to make it easy for users to remove
-     YYFAIL uses, which will produce warnings from Bison 2.5.  */
-#endif
 
 #define YYRECOVERING()  (!!yyerrstatus)
 
@@ -1013,7 +1005,7 @@ while (YYID (0))
    we won't break user code: when these are the locations we know.  */
 
 #ifndef YY_LOCATION_PRINT
-# if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL
+# if YYLTYPE_IS_TRIVIAL
 #  define YY_LOCATION_PRINT(File, Loc)			\
      fprintf (File, "%d.%d-%d.%d",			\
 	      (Loc).first_line, (Loc).first_column,	\
@@ -1555,7 +1547,7 @@ YYLTYPE yylloc;
     YYLTYPE *yylsp;
 
     /* The locations where the error started and ended.  */
-    YYLTYPE yyerror_range[3];
+    YYLTYPE yyerror_range[2];
 
     YYSIZE_T yystacksize;
 
@@ -1602,7 +1594,7 @@ YYLTYPE yylloc;
   yyvsp = yyvs;
   yylsp = yyls;
 
-#if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL
+#if YYLTYPE_IS_TRIVIAL
   /* Initialize the default location before parsing starts.  */
   yylloc.first_line   = yylloc.last_line   = 1;
   yylloc.first_column = yylloc.last_column = 1;
@@ -1610,7 +1602,7 @@ YYLTYPE yylloc;
 
 /* User initialization code.  */
 
-/* Line 1251 of yacc.c  */
+/* Line 1242 of yacc.c  */
 #line 155 "glcpp/glcpp-parse.y"
 {
 	yylloc.first_line = 1;
@@ -1620,8 +1612,8 @@ YYLTYPE yylloc;
 	yylloc.source = 0;
 }
 
-/* Line 1251 of yacc.c  */
-#line 1625 "glcpp/glcpp-parse.c"
+/* Line 1242 of yacc.c  */
+#line 1617 "glcpp/glcpp-parse.c"
   yylsp[0] = yylloc;
 
   goto yysetstate;
@@ -1808,7 +1800,7 @@ yyreduce:
     {
         case 4:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 194 "glcpp/glcpp-parse.y"
     {
 		glcpp_print(parser->output, "\n");
@@ -1817,7 +1809,7 @@ yyreduce:
 
   case 5:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 197 "glcpp/glcpp-parse.y"
     {
 		_glcpp_parser_print_expanded_token_list (parser, (yyvsp[(1) - (1)].token_list));
@@ -1828,7 +1820,7 @@ yyreduce:
 
   case 8:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 207 "glcpp/glcpp-parse.y"
     {
 		_glcpp_parser_skip_stack_push_if (parser, & (yylsp[(1) - (3)]), (yyvsp[(2) - (3)].ival));
@@ -1837,7 +1829,7 @@ yyreduce:
 
   case 9:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 210 "glcpp/glcpp-parse.y"
     {
 		_glcpp_parser_skip_stack_change_if (parser, & (yylsp[(1) - (3)]), "elif", (yyvsp[(2) - (3)].ival));
@@ -1846,7 +1838,7 @@ yyreduce:
 
   case 10:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 216 "glcpp/glcpp-parse.y"
     {
 		_define_object_macro (parser, & (yylsp[(2) - (4)]), (yyvsp[(2) - (4)].str), (yyvsp[(3) - (4)].token_list));
@@ -1855,7 +1847,7 @@ yyreduce:
 
   case 11:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 219 "glcpp/glcpp-parse.y"
     {
 		_define_function_macro (parser, & (yylsp[(2) - (6)]), (yyvsp[(2) - (6)].str), NULL, (yyvsp[(5) - (6)].token_list));
@@ -1864,7 +1856,7 @@ yyreduce:
 
   case 12:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 222 "glcpp/glcpp-parse.y"
     {
 		_define_function_macro (parser, & (yylsp[(2) - (7)]), (yyvsp[(2) - (7)].str), (yyvsp[(4) - (7)].string_list), (yyvsp[(6) - (7)].token_list));
@@ -1873,7 +1865,7 @@ yyreduce:
 
   case 13:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 225 "glcpp/glcpp-parse.y"
     {
 		macro_t *macro = hash_table_find (parser->defines, (yyvsp[(2) - (3)].str));
@@ -1887,7 +1879,7 @@ yyreduce:
 
   case 14:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 233 "glcpp/glcpp-parse.y"
     {
 		/* Be careful to only evaluate the 'if' expression if
@@ -1912,7 +1904,7 @@ yyreduce:
 
   case 15:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 252 "glcpp/glcpp-parse.y"
     {
 		/* #if without an expression is only an error if we
@@ -1928,7 +1920,7 @@ yyreduce:
 
   case 16:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 262 "glcpp/glcpp-parse.y"
     {
 		macro_t *macro = hash_table_find (parser->defines, (yyvsp[(2) - (4)].str));
@@ -1939,7 +1931,7 @@ yyreduce:
 
   case 17:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 267 "glcpp/glcpp-parse.y"
     {
 		macro_t *macro = hash_table_find (parser->defines, (yyvsp[(2) - (4)].str));
@@ -1950,7 +1942,7 @@ yyreduce:
 
   case 18:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 272 "glcpp/glcpp-parse.y"
     {
 		/* Be careful to only evaluate the 'elif' expression
@@ -1975,7 +1967,7 @@ yyreduce:
 
   case 19:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 291 "glcpp/glcpp-parse.y"
     {
 		/* #elif without an expression is an error unless we
@@ -1996,7 +1988,7 @@ yyreduce:
 
   case 20:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 306 "glcpp/glcpp-parse.y"
     {
 		_glcpp_parser_skip_stack_change_if (parser, & (yylsp[(1) - (2)]), "else", 1);
@@ -2005,7 +1997,7 @@ yyreduce:
 
   case 21:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 309 "glcpp/glcpp-parse.y"
     {
 		_glcpp_parser_skip_stack_pop (parser, & (yylsp[(1) - (2)]));
@@ -2014,7 +2006,7 @@ yyreduce:
 
   case 22:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 312 "glcpp/glcpp-parse.y"
     {
 		macro_t *macro = hash_table_find (parser->defines, "__VERSION__");
@@ -2033,7 +2025,7 @@ yyreduce:
 
   case 24:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 329 "glcpp/glcpp-parse.y"
     {
 		if (strlen ((yyvsp[(1) - (1)].str)) >= 3 && strncmp ((yyvsp[(1) - (1)].str), "0x", 2) == 0) {
@@ -2048,7 +2040,7 @@ yyreduce:
 
   case 25:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 338 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (1)].ival);
@@ -2057,7 +2049,7 @@ yyreduce:
 
   case 27:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 344 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) || (yyvsp[(3) - (3)].ival);
@@ -2066,7 +2058,7 @@ yyreduce:
 
   case 28:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 347 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) && (yyvsp[(3) - (3)].ival);
@@ -2075,7 +2067,7 @@ yyreduce:
 
   case 29:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 350 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) | (yyvsp[(3) - (3)].ival);
@@ -2084,7 +2076,7 @@ yyreduce:
 
   case 30:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 353 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) ^ (yyvsp[(3) - (3)].ival);
@@ -2093,7 +2085,7 @@ yyreduce:
 
   case 31:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 356 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) & (yyvsp[(3) - (3)].ival);
@@ -2102,7 +2094,7 @@ yyreduce:
 
   case 32:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 359 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) != (yyvsp[(3) - (3)].ival);
@@ -2111,7 +2103,7 @@ yyreduce:
 
   case 33:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 362 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) == (yyvsp[(3) - (3)].ival);
@@ -2120,7 +2112,7 @@ yyreduce:
 
   case 34:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 365 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) >= (yyvsp[(3) - (3)].ival);
@@ -2129,7 +2121,7 @@ yyreduce:
 
   case 35:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 368 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) <= (yyvsp[(3) - (3)].ival);
@@ -2138,7 +2130,7 @@ yyreduce:
 
   case 36:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 371 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) > (yyvsp[(3) - (3)].ival);
@@ -2147,7 +2139,7 @@ yyreduce:
 
   case 37:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 374 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) < (yyvsp[(3) - (3)].ival);
@@ -2156,7 +2148,7 @@ yyreduce:
 
   case 38:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 377 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) >> (yyvsp[(3) - (3)].ival);
@@ -2165,7 +2157,7 @@ yyreduce:
 
   case 39:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 380 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) << (yyvsp[(3) - (3)].ival);
@@ -2174,7 +2166,7 @@ yyreduce:
 
   case 40:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 383 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) - (yyvsp[(3) - (3)].ival);
@@ -2183,7 +2175,7 @@ yyreduce:
 
   case 41:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 386 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) + (yyvsp[(3) - (3)].ival);
@@ -2192,7 +2184,7 @@ yyreduce:
 
   case 42:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 389 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) % (yyvsp[(3) - (3)].ival);
@@ -2201,7 +2193,7 @@ yyreduce:
 
   case 43:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 392 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) / (yyvsp[(3) - (3)].ival);
@@ -2210,7 +2202,7 @@ yyreduce:
 
   case 44:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 395 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(1) - (3)].ival) * (yyvsp[(3) - (3)].ival);
@@ -2219,7 +2211,7 @@ yyreduce:
 
   case 45:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 398 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = ! (yyvsp[(2) - (2)].ival);
@@ -2228,7 +2220,7 @@ yyreduce:
 
   case 46:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 401 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = ~ (yyvsp[(2) - (2)].ival);
@@ -2237,7 +2229,7 @@ yyreduce:
 
   case 47:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 404 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = - (yyvsp[(2) - (2)].ival);
@@ -2246,7 +2238,7 @@ yyreduce:
 
   case 48:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 407 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = + (yyvsp[(2) - (2)].ival);
@@ -2255,7 +2247,7 @@ yyreduce:
 
   case 49:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 410 "glcpp/glcpp-parse.y"
     {
 		(yyval.ival) = (yyvsp[(2) - (3)].ival);
@@ -2264,7 +2256,7 @@ yyreduce:
 
   case 50:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 416 "glcpp/glcpp-parse.y"
     {
 		(yyval.string_list) = _string_list_create (parser);
@@ -2275,7 +2267,7 @@ yyreduce:
 
   case 51:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 421 "glcpp/glcpp-parse.y"
     {
 		(yyval.string_list) = (yyvsp[(1) - (3)].string_list);	
@@ -2286,14 +2278,14 @@ yyreduce:
 
   case 52:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 429 "glcpp/glcpp-parse.y"
     { (yyval.token_list) = NULL; ;}
     break;
 
   case 54:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 434 "glcpp/glcpp-parse.y"
     {
 		yyerror (& (yylsp[(1) - (2)]), parser, "Invalid tokens after #");
@@ -2302,14 +2294,14 @@ yyreduce:
 
   case 55:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 440 "glcpp/glcpp-parse.y"
     { (yyval.token_list) = NULL; ;}
     break;
 
   case 58:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 446 "glcpp/glcpp-parse.y"
     {
 		glcpp_warning(&(yylsp[(1) - (1)]), parser, "extra tokens at end of directive");
@@ -2318,7 +2310,7 @@ yyreduce:
 
   case 59:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 453 "glcpp/glcpp-parse.y"
     {
 		int v = hash_table_find (parser->defines, (yyvsp[(2) - (2)].str)) ? 1 : 0;
@@ -2328,7 +2320,7 @@ yyreduce:
 
   case 60:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 457 "glcpp/glcpp-parse.y"
     {
 		int v = hash_table_find (parser->defines, (yyvsp[(3) - (4)].str)) ? 1 : 0;
@@ -2338,7 +2330,7 @@ yyreduce:
 
   case 62:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 466 "glcpp/glcpp-parse.y"
     {
 		parser->space_tokens = 1;
@@ -2350,7 +2342,7 @@ yyreduce:
 
   case 63:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 472 "glcpp/glcpp-parse.y"
     {
 		(yyval.token_list) = (yyvsp[(1) - (2)].token_list);
@@ -2361,7 +2353,7 @@ yyreduce:
 
   case 64:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 480 "glcpp/glcpp-parse.y"
     {
 		parser->space_tokens = 1;
@@ -2373,7 +2365,7 @@ yyreduce:
 
   case 65:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 486 "glcpp/glcpp-parse.y"
     {
 		(yyval.token_list) = (yyvsp[(1) - (2)].token_list);
@@ -2384,7 +2376,7 @@ yyreduce:
 
   case 66:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 494 "glcpp/glcpp-parse.y"
     {
 		(yyval.token) = _token_create_str (parser, IDENTIFIER, (yyvsp[(1) - (1)].str));
@@ -2394,7 +2386,7 @@ yyreduce:
 
   case 67:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 498 "glcpp/glcpp-parse.y"
     {
 		(yyval.token) = _token_create_str (parser, INTEGER_STRING, (yyvsp[(1) - (1)].str));
@@ -2404,7 +2396,7 @@ yyreduce:
 
   case 68:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 502 "glcpp/glcpp-parse.y"
     {
 		(yyval.token) = _token_create_ival (parser, (yyvsp[(1) - (1)].ival), (yyvsp[(1) - (1)].ival));
@@ -2414,7 +2406,7 @@ yyreduce:
 
   case 69:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 506 "glcpp/glcpp-parse.y"
     {
 		(yyval.token) = _token_create_str (parser, OTHER, (yyvsp[(1) - (1)].str));
@@ -2424,7 +2416,7 @@ yyreduce:
 
   case 70:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 510 "glcpp/glcpp-parse.y"
     {
 		(yyval.token) = _token_create_ival (parser, SPACE, SPACE);
@@ -2434,225 +2426,225 @@ yyreduce:
 
   case 71:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 517 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '['; ;}
     break;
 
   case 72:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 518 "glcpp/glcpp-parse.y"
     { (yyval.ival) = ']'; ;}
     break;
 
   case 73:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 519 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '('; ;}
     break;
 
   case 74:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 520 "glcpp/glcpp-parse.y"
     { (yyval.ival) = ')'; ;}
     break;
 
   case 75:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 521 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '{'; ;}
     break;
 
   case 76:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 522 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '}'; ;}
     break;
 
   case 77:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 523 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '.'; ;}
     break;
 
   case 78:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 524 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '&'; ;}
     break;
 
   case 79:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 525 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '*'; ;}
     break;
 
   case 80:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 526 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '+'; ;}
     break;
 
   case 81:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 527 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '-'; ;}
     break;
 
   case 82:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 528 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '~'; ;}
     break;
 
   case 83:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 529 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '!'; ;}
     break;
 
   case 84:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 530 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '/'; ;}
     break;
 
   case 85:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 531 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '%'; ;}
     break;
 
   case 86:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 532 "glcpp/glcpp-parse.y"
     { (yyval.ival) = LEFT_SHIFT; ;}
     break;
 
   case 87:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 533 "glcpp/glcpp-parse.y"
     { (yyval.ival) = RIGHT_SHIFT; ;}
     break;
 
   case 88:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 534 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '<'; ;}
     break;
 
   case 89:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 535 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '>'; ;}
     break;
 
   case 90:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 536 "glcpp/glcpp-parse.y"
     { (yyval.ival) = LESS_OR_EQUAL; ;}
     break;
 
   case 91:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 537 "glcpp/glcpp-parse.y"
     { (yyval.ival) = GREATER_OR_EQUAL; ;}
     break;
 
   case 92:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 538 "glcpp/glcpp-parse.y"
     { (yyval.ival) = EQUAL; ;}
     break;
 
   case 93:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 539 "glcpp/glcpp-parse.y"
     { (yyval.ival) = NOT_EQUAL; ;}
     break;
 
   case 94:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 540 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '^'; ;}
     break;
 
   case 95:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 541 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '|'; ;}
     break;
 
   case 96:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 542 "glcpp/glcpp-parse.y"
     { (yyval.ival) = AND; ;}
     break;
 
   case 97:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 543 "glcpp/glcpp-parse.y"
     { (yyval.ival) = OR; ;}
     break;
 
   case 98:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 544 "glcpp/glcpp-parse.y"
     { (yyval.ival) = ';'; ;}
     break;
 
   case 99:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 545 "glcpp/glcpp-parse.y"
     { (yyval.ival) = ','; ;}
     break;
 
   case 100:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 546 "glcpp/glcpp-parse.y"
     { (yyval.ival) = '='; ;}
     break;
 
   case 101:
 
-/* Line 1464 of yacc.c  */
+/* Line 1455 of yacc.c  */
 #line 547 "glcpp/glcpp-parse.y"
     { (yyval.ival) = PASTE; ;}
     break;
 
 
-/* Line 1464 of yacc.c  */
-#line 2656 "glcpp/glcpp-parse.c"
+/* Line 1455 of yacc.c  */
+#line 2648 "glcpp/glcpp-parse.c"
       default: break;
     }
   YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
@@ -2724,7 +2716,7 @@ yyerrlab:
 #endif
     }
 
-  yyerror_range[1] = yylloc;
+  yyerror_range[0] = yylloc;
 
   if (yyerrstatus == 3)
     {
@@ -2761,7 +2753,7 @@ yyerrorlab:
   if (/*CONSTCOND*/ 0)
      goto yyerrorlab;
 
-  yyerror_range[1] = yylsp[1-yylen];
+  yyerror_range[0] = yylsp[1-yylen];
   /* Do not reclaim the symbols of the rule which action triggered
      this YYERROR.  */
   YYPOPSTACK (yylen);
@@ -2795,7 +2787,7 @@ yyerrlab1:
       if (yyssp == yyss)
 	YYABORT;
 
-      yyerror_range[1] = *yylsp;
+      yyerror_range[0] = *yylsp;
       yydestruct ("Error: popping",
 		  yystos[yystate], yyvsp, yylsp, parser);
       YYPOPSTACK (1);
@@ -2805,10 +2797,10 @@ yyerrlab1:
 
   *++yyvsp = yylval;
 
-  yyerror_range[2] = yylloc;
+  yyerror_range[1] = yylloc;
   /* Using YYLLOC is tempting, but would change the location of
      the lookahead.  YYLOC is available though.  */
-  YYLLOC_DEFAULT (yyloc, yyerror_range, 2);
+  YYLLOC_DEFAULT (yyloc, (yyerror_range - 1), 2);
   *++yylsp = yyloc;
 
   /* Shift the error token.  */
@@ -2870,7 +2862,7 @@ yyreturn:
 
 
-/* Line 1684 of yacc.c  */
+/* Line 1675 of yacc.c  */
 #line 550 "glcpp/glcpp-parse.y"
 
 
diff --git a/src/glsl/glcpp/glcpp-parse.h b/src/glsl/glcpp/glcpp-parse.h
index 40556854f38..50758930e9c 100644
--- a/src/glsl/glcpp/glcpp-parse.h
+++ b/src/glsl/glcpp/glcpp-parse.h
@@ -1,9 +1,10 @@
-/* A Bison parser, made by GNU Bison 2.4.3.  */
+
+/* A Bison parser, made by GNU Bison 2.4.1.  */
 
 /* Skeleton interface for Bison's Yacc-like parsers in C
    
-      Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-   2009, 2010 Free Software Foundation, Inc.
+      Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
+   Free Software Foundation, Inc.
    
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
-- 
cgit v1.2.3


From cd114a92b996c246bb35080bca611fca3f375e94 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Tue, 5 Oct 2010 15:18:29 +0200
Subject: vl: change the xvmc state_tracker to the new gallium API

---
 src/gallium/state_trackers/xorg/xvmc/subpicture.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index e0c9e303817..4f6c80d4bee 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -211,18 +211,27 @@ Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, sh
 {
    XvMCSubpicturePrivate *subpicture_priv;
    XvMCContextPrivate *context_priv;
+   unsigned int tmp_color;
+   float color_f[4];
 
    assert(dpy);
 
    if (!subpicture)
       return XvMCBadSubpicture;
+	  	
+   /* Convert color to */
+   util_format_read_4f(PIPE_FORMAT_B8G8R8A8_UNORM,
+                    color_f, 1,
+                    &color, 4,
+                    0, 0, 1, 1);
 
    subpicture_priv = subpicture->privData;
    context_priv = subpicture_priv->context->privData;
    /* TODO: Assert clear rect is within bounds? Or clip? */
-   context_priv->vctx->vpipe->surface_fill(context_priv->vctx->vpipe,
+   context_priv->vctx->vpipe->clear_render_target(context_priv->vctx->vpipe,
                                            subpicture_priv->sfc, x, y,
-                                           width, height, color);
+										   color_f,
+                                           width, height);
 
    return Success;
 }
-- 
cgit v1.2.3


From d0e203f1f00b0f760acc7fab07cd7ce8cca34000 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Wed, 6 Oct 2010 00:19:53 +0200
Subject: vl: initial commit of the bitstream parser

---
 src/gallium/state_trackers/vdpau/decode.c          | 14 +++-
 .../state_trackers/vdpau/mpeg2_bitstream_parser.c  | 83 ++++++++++++++++++++--
 .../state_trackers/vdpau/mpeg2_bitstream_parser.h  | 21 +++++-
 src/gallium/state_trackers/xorg/xvmc/subpicture.c  |  2 +-
 4 files changed, 108 insertions(+), 12 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 3e7cb4a3cab..03764a7f33d 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -211,11 +211,19 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 	
 	ret = vlVdpCreateSurfaceTarget(vldecoder,t_vdp_surf);
 
-	vlVdpBitstreamToMacroblock(vpipe->screen, bitstream_buffers,
-                     &num_macroblocks, &pipe_macroblocks);
+	if (vlVdpMPEG2BitstreamToMacroblock(vpipe->screen, bitstream_buffers, bitstream_buffer_count,
+                     &num_macroblocks, &pipe_macroblocks))
+					 {
+						 debug_printf("[VDPAU] Error in frame-header. Skipping.\n");
+						 
+						 ret = VDP_STATUS_OK;
+						 goto skip_frame;
+					 }
 		
 	vpipe->set_decode_target(vpipe,t_surf);
-	vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks, pipe_macroblocks, NULL);
+	vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks, (struct pipe_macroblock *)pipe_macroblocks, NULL);
+	
+	skip_frame:
 	return ret;
 }
 
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
index 39019660edd..d88afb495f7 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
@@ -27,18 +27,89 @@
 
 #include "mpeg2_bitstream_parser.h"
 
-void
-vlVdpBitstreamToMacroblock (
+int
+vlVdpMPEG2NextStartCode(struct vdpMPEG2BitstreamParser *parser)
+{
+	uint32_t integer = 0;
+	uint32_t bytes_to_end;
+	
+	/* Move cursor to the start of a byte */
+	while(parser->cursor % 8)
+		parser->cursor++;
+		
+	bytes_to_end = parser->cur_bitstream_length - parser->cursor/8 - 1;
+		
+	/* Read byte after byte, until startcode is found */
+	while(integer != 0x00000100)
+	{
+		if (bytes_to_end < 0)
+		{
+			parser->state = MPEG2_HEADER_DONE;
+			return 1;
+		}
+		
+		integer << 8;
+		integer = integer & (unsigned char)(parser->ptr_bitstream + parser->cursor/8)[0];
+	
+		bytes_to_end--;
+		parser->cursor += 8;
+		
+	}
+	
+	/* start_code found. rewind cursor a byte */
+	parser->cursor -= 8;
+	
+	return 0;
+}
+
+int
+vlVdpMPEG2BitstreamToMacroblock (
 		  struct pipe_screen *screen,
 		  VdpBitstreamBuffer const *bitstream_buffers,
+		  uint32_t bitstream_buffer_count,
           unsigned int *num_macroblocks,
           struct pipe_mpeg12_macroblock **pipe_macroblocks)
 {
-	debug_printf("[VDPAU] BitstreamToMacroblock not implemented yet");
-	assert(0);
-
+	bool b_header_done = false;
+	struct vdpMPEG2BitstreamParser parser;
+	
+	num_macroblocks[0] = 0;
+	
+	memset(&parser,0,sizeof(parser));
+	parser.state = MPEG2_HEADER_START_CODE;
+	parser.cur_bitstream_length = bitstream_buffers[0].bitstream_bytes;
+	parser.ptr_bitstream = (unsigned char *)bitstream_buffers[0].bitstream;
+	
+	/* Main header parser loop */
+	while(!b_header_done)
+	{
+		switch (parser.state)
+		{
+		case MPEG2_HEADER_START_CODE:
+			if (vlVdpMPEG2NextStartCode(&parser))
+				continue;
+			
+			/* Start_code found */
+			switch ((parser.ptr_bitstream + parser.cursor/8)[0])
+			{
+				/* sequence_header_code */
+				case 0xB3:
+				debug_printf("[VDPAU][Bitstream parser] Sequence header code found at cursor pos: %d\n", parser.cursor);
+				exit(1);
+				break;
+			}
+		
+		break;
+		case MPEG2_HEADER_DONE:
+			debug_printf("[VDPAU][Bitstream parser] Done parsing current header\n");
+		break;
+		
+		}
+		
+		
+	}
 	
 
-	return;
+	return 0;
 }
 
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
index 534503df53f..74a216a4d81 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
@@ -32,10 +32,27 @@
 #include <pipe/p_video_state.h>
 #include "vdpau_private.h"
 
-void
-vlVdpBitstreamToMacroblock(struct pipe_screen *screen,
+enum vdpMPEG2States
+{
+	MPEG2_HEADER_START_CODE,
+	MPEG2_HEADER_DONE
+};
+
+struct vdpMPEG2BitstreamParser
+{
+	enum vdpMPEG2States state;
+	uint32_t cursor;                // current bit cursor
+	uint32_t cur_bitstream;
+	uint32_t cur_bitstream_length;
+	unsigned char *ptr_bitstream;
+};
+
+int
+vlVdpMPEG2BitstreamToMacroblock(struct pipe_screen *screen,
                   VdpBitstreamBuffer const *bitstream_buffers,
+				  uint32_t bitstream_buffer_count,
                   unsigned int *num_macroblocks,
                   struct pipe_mpeg12_macroblock **pipe_macroblocks);
+				  
 
 #endif // MPEG2_BITSTREAM_PARSER_H
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 4f6c80d4bee..7e82cd17288 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -219,7 +219,7 @@ Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, sh
    if (!subpicture)
       return XvMCBadSubpicture;
 	  	
-   /* Convert color to */
+   /* Convert color to float */
    util_format_read_4f(PIPE_FORMAT_B8G8R8A8_UNORM,
                     color_f, 1,
                     &color, 4,
-- 
cgit v1.2.3


From 65fe0866aec7b5608419f6d184cb1fa4fe1dc45a Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Wed, 6 Oct 2010 23:30:08 +0200
Subject: vl: implemented a few functions and made stubs to get mplayer running

---
 src/gallium/auxiliary/vl/vl_compositor.c           |   1 +
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c   |   1 +
 src/gallium/state_trackers/vdpau/Makefile          |   4 +-
 src/gallium/state_trackers/vdpau/decode.c          |  26 +++-
 src/gallium/state_trackers/vdpau/device.c          |  45 ++++++-
 src/gallium/state_trackers/vdpau/ftab.c            |  60 ++++-----
 src/gallium/state_trackers/vdpau/header.c          |   0
 src/gallium/state_trackers/vdpau/mixer.c           | 135 +++++++++++++++++++++
 .../state_trackers/vdpau/mpeg2_bitstream_parser.c  |   2 +
 src/gallium/state_trackers/vdpau/output.c          |  23 +++-
 src/gallium/state_trackers/vdpau/preemption.c      |  39 ++++++
 src/gallium/state_trackers/vdpau/presentation.c    |  34 +++++-
 src/gallium/state_trackers/vdpau/render.c          |   0
 src/gallium/state_trackers/vdpau/vdpau_private.h   |  61 ++++++++++
 14 files changed, 387 insertions(+), 44 deletions(-)
 delete mode 100644 src/gallium/state_trackers/vdpau/header.c
 delete mode 100644 src/gallium/state_trackers/vdpau/render.c

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index ee7bf070037..1dbf14ee7b9 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -31,6 +31,7 @@
 #include <util/u_inlines.h>
 #include <util/u_memory.h>
 #include <util/u_keymap.h>
+#include <util/u_draw.h>
 #include <util/u_sampler.h>
 #include <tgsi/tgsi_ureg.h>
 #include "vl_csc.h"
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 8a8c155e8ec..264ab3d4566 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -34,6 +34,7 @@
 #include <util/u_memory.h>
 #include <util/u_keymap.h>
 #include <util/u_sampler.h>
+#include <util/u_draw.h>
 #include <tgsi/tgsi_ureg.h>
 
 #define DEFAULT_BUF_ALIGNMENT 1
diff --git a/src/gallium/state_trackers/vdpau/Makefile b/src/gallium/state_trackers/vdpau/Makefile
index ad37676b95e..0e68d4fe007 100644
--- a/src/gallium/state_trackers/vdpau/Makefile
+++ b/src/gallium/state_trackers/vdpau/Makefile
@@ -20,7 +20,9 @@ C_SOURCES = htab.c \
 	    presentation.c \
 	    bitmap.c \
 	    mpeg2_bitstream_parser.c \
-	    output.c
+	    output.c \
+	    preemption.c \
+	    mixer.c
 
 
 include ../../Makefile.template
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 03764a7f33d..f6003304668 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -98,6 +98,7 @@ VdpStatus
 vlVdpDecoderDestroy  (VdpDecoder decoder
 )
 {
+	debug_printf("[VDPAU] Destroying decoder\n");
 	vlVdpDecoder *vldecoder;
 	
 	vldecoder = (vlVdpDecoder *)vlGetDataHTAB(decoder);
@@ -105,8 +106,11 @@ vlVdpDecoderDestroy  (VdpDecoder decoder
       return VDP_STATUS_INVALID_HANDLE;
 	}
 	
-	if (vldecoder->vctx->vscreen)
-		vl_screen_destroy(vldecoder->vctx->vscreen);
+	if (vldecoder->vctx)
+	{
+		if (vldecoder->vctx->vscreen)
+			vl_screen_destroy(vldecoder->vctx->vscreen);
+	}
 	
 	if (vldecoder->vctx)
 		vl_video_destroy(vldecoder->vctx);
@@ -124,6 +128,8 @@ vlVdpCreateSurfaceTarget   (vlVdpDecoder *vldecoder,
 	struct pipe_resource tmplt;
 	struct pipe_resource *surf_tex;
 	struct pipe_video_context *vpipe;
+	
+	debug_printf("[VDPAU] Creating surface\n");
 		
 	if(!(vldecoder && vlsurf))
 		return VDP_STATUS_INVALID_POINTER;
@@ -185,6 +191,7 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 	struct pipe_mpeg12_macroblock *pipe_macroblocks;
 	VdpStatus ret;
 	
+	debug_printf("[VDPAU] Decoding MPEG2\n");
 
 	vpipe = vldecoder->vctx->vpipe;
 	t_vdp_surf = vlsurf;
@@ -221,7 +228,7 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 					 }
 		
 	vpipe->set_decode_target(vpipe,t_surf);
-	vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks, (struct pipe_macroblock *)pipe_macroblocks, NULL);
+	//vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks, (struct pipe_macroblock *)pipe_macroblocks, NULL);
 	
 	skip_frame:
 	return ret;
@@ -283,3 +290,16 @@ vlVdpDecoderRender (VdpDecoder decoder,
 
 	return ret;
 }
+
+VdpStatus 
+vlVdpGenerateCSCMatrix(
+	VdpProcamp *procamp, 
+	VdpColorStandard standard,
+	VdpCSCMatrix *csc_matrix)
+{
+	debug_printf("[VDPAU] Generating CSCMatrix\n");
+	if (!(csc_matrix && procamp))
+		return VDP_STATUS_INVALID_POINTER;
+		
+	return VDP_STATUS_OK;
+}
\ No newline at end of file
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index d370d1c6610..4ca198e874d 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2010 Younes Manton.
+ * Copyright 2010 Younes Manton og Thomas Balling Sørensen.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,21 +25,18 @@
  *
  **************************************************************************/
 
-#include <vdpau/vdpau_x11.h>
 #include <pipe/p_compiler.h>
 #include <vl_winsys.h>
 #include <util/u_memory.h>
 #include <util/u_debug.h>
 #include "vdpau_private.h"
 
-VdpDeviceCreateX11 vdp_imp_device_create_x11;
 
 PUBLIC VdpStatus
 vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, VdpGetProcAddress **get_proc_address)
 {
    VdpStatus    ret;
    vlVdpDevice *dev = NULL;
-   struct vl_screen *vlscreen = NULL;
 
    if (!(display && device && get_proc_address))
       return VDP_STATUS_INVALID_POINTER;
@@ -62,9 +59,8 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, VdpGe
       ret = VDP_STATUS_ERROR;
       goto no_handle;
    }
-
+	
    *get_proc_address = &vlVdpGetProcAddress;
-   
    debug_printf("[VDPAU] Device created succesfully\n");
 
    return VDP_STATUS_OK;
@@ -77,9 +73,46 @@ no_htab:
    return ret;
 }
 
+PUBLIC VdpStatus 
+vlVdpPresentationQueueTargetCreateX11(VdpDevice device, Drawable drawable,VdpPresentationQueueTarget *target)
+{
+   VdpStatus    ret;
+   vlVdpPresentationQueueTarget *pqt = NULL;
+   
+   debug_printf("[VDPAU] Creating PresentationQueueTarget\n");
+
+   if (!drawable)
+      return VDP_STATUS_INVALID_HANDLE;
+	  
+   vlVdpDevice *dev = vlGetDataHTAB(device);
+   if (!dev)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   pqt = CALLOC(1, sizeof(vlVdpPresentationQueue));
+   if (!pqt)
+      return VDP_STATUS_RESOURCES;
+   
+   pqt->device = dev;
+   pqt->drawable = drawable;
+	  
+	*target = vlAddDataHTAB(pqt);
+   if (*target == 0) {
+      ret = VDP_STATUS_ERROR;
+      goto no_handle;
+   }
+
+
+	return VDP_STATUS_OK;
+    no_handle:
+    FREE(dev);
+	return ret;
+}
+
 VdpStatus 
 vlVdpDeviceDestroy(VdpDevice device)
 {
+   debug_printf("[VDPAU] Destroying destroy\n");
+	
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
diff --git a/src/gallium/state_trackers/vdpau/ftab.c b/src/gallium/state_trackers/vdpau/ftab.c
index 1842c4da0ea..2142dcd4f6a 100644
--- a/src/gallium/state_trackers/vdpau/ftab.c
+++ b/src/gallium/state_trackers/vdpau/ftab.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2010 Younes Manton.
+ * Copyright 2010 Younes Manton & Thomas Balling Sørensen.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -33,10 +33,10 @@ static void* ftab[67] =
    &vlVdpGetErrorString, /* VDP_FUNC_ID_GET_ERROR_STRING */
    &vlVdpGetProcAddress, /* VDP_FUNC_ID_GET_PROC_ADDRESS */
    &vlVdpGetApiVersion, /* VDP_FUNC_ID_GET_API_VERSION */
-   0,
+   0x555,					/* DUMMY */
    &vlVdpGetInformationString, /* VDP_FUNC_ID_GET_INFORMATION_STRING */
    &vlVdpDeviceDestroy, /* VDP_FUNC_ID_DEVICE_DESTROY */
-   0, /* VDP_FUNC_ID_GENERATE_CSC_MATRIX */
+   &vlVdpGenerateCSCMatrix, /* VDP_FUNC_ID_GENERATE_CSC_MATRIX */
    &vlVdpVideoSurfaceQueryCapabilities, /* VDP_FUNC_ID_VIDEO_SURFACE_QUERY_CAPABILITIES */
    &vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities, /* VDP_FUNC_ID_VIDEO_SURFACE_QUERY_GET_PUT_BITS_Y_CB_CR_CAPABILITIES */
    &vlVdpVideoSurfaceCreate, /* VDP_FUNC_ID_VIDEO_SURFACE_CREATE */
@@ -46,62 +46,62 @@ static void* ftab[67] =
    &vlVdpVideoSurfacePutBitsYCbCr, /* VDP_FUNC_ID_VIDEO_SURFACE_PUT_BITS_Y_CB_CR */
    &vlVdpOutputSurfaceQueryCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_CAPABILITIES */
    &vlVdpOutputSurfaceQueryGetPutBitsNativeCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_GET_PUT_BITS_NATIVE_CAPABILITIES */
-   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_PUT_BITS_INDEXED_CAPABILITIES */
+   0x2, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_PUT_BITS_INDEXED_CAPABILITIES */
    &vlVdpOutputSurfaceQueryPutBitsYCbCrCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_PUT_BITS_Y_CB_CR_CAPABILITIES */
    &vlVdpOutputSurfaceCreate, /* VDP_FUNC_ID_OUTPUT_SURFACE_CREATE */
-   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_DESTROY */
-   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_GET_PARAMETERS */
-   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_GET_BITS_NATIVE */
-   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_NATIVE */
-   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_INDEXED */
-   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_Y_CB_CR */
+   0x3, /* VDP_FUNC_ID_OUTPUT_SURFACE_DESTROY */
+   0x4, /* VDP_FUNC_ID_OUTPUT_SURFACE_GET_PARAMETERS */
+   0x5, /* VDP_FUNC_ID_OUTPUT_SURFACE_GET_BITS_NATIVE */
+   0x6, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_NATIVE */
+   0x7, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_INDEXED */
+   0x8, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_Y_CB_CR */
    &vlVdpBitmapSurfaceQueryCapabilities, /* VDP_FUNC_ID_BITMAP_SURFACE_QUERY_CAPABILITIES */
    &vlVdpBitmapSurfaceCreate, /* VDP_FUNC_ID_BITMAP_SURFACE_CREATE */
    &vlVdpBitmapSurfaceDestroy, /* VDP_FUNC_ID_BITMAP_SURFACE_DESTROY */
    &vlVdpBitmapSurfaceGetParameters, /* VDP_FUNC_ID_BITMAP_SURFACE_GET_PARAMETERS */
    &vlVdpBitmapSurfacePutBitsNative, /* VDP_FUNC_ID_BITMAP_SURFACE_PUT_BITS_NATIVE */
-   0,
-   0,
-   0,
-   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_OUTPUT_SURFACE */
-   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_BITMAP_SURFACE */
-   0, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_VIDEO_SURFACE_LUMA */
+   0x55,	/* DUMMY */
+   0x55,	/* DUMMY */
+   0x55,	/* DUMMY */
+   0x9, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_OUTPUT_SURFACE */
+   0x10, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_BITMAP_SURFACE */
+   0x11, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_VIDEO_SURFACE_LUMA */
    &vlVdpDecoderQueryCapabilities, /* VDP_FUNC_ID_DECODER_QUERY_CAPABILITIES */
    &vlVdpDecoderCreate, /* VDP_FUNC_ID_DECODER_CREATE */
    &vlVdpDecoderDestroy, /* VDP_FUNC_ID_DECODER_DESTROY */
-   0, /* VDP_FUNC_ID_DECODER_GET_PARAMETERS */
+   0x12, /* VDP_FUNC_ID_DECODER_GET_PARAMETERS */
    &vlVdpDecoderRender, /* VDP_FUNC_ID_DECODER_RENDER */
    &vlVdpVideoMixerQueryFeatureSupport, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_FEATURE_SUPPORT */
    &vlVdpVideoMixerQueryParameterSupport, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_PARAMETER_SUPPORT */
    &vlVdpVideoMixerQueryAttributeSupport, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_ATTRIBUTE_SUPPORT */
    &vlVdpVideoMixerQueryParameterValueRange, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_PARAMETER_VALUE_RANGE */
    &vlVdpVideoMixerQueryAttributeValueRange, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_ATTRIBUTE_VALUE_RANGE */
-   0, /* VDP_FUNC_ID_VIDEO_MIXER_CREATE */
-   0, /* VDP_FUNC_ID_VIDEO_MIXER_SET_FEATURE_ENABLES */
-   0, /* VDP_FUNC_ID_VIDEO_MIXER_SET_ATTRIBUTE_VALUES */
-   0, /* VDP_FUNC_ID_VIDEO_MIXER_GET_FEATURE_SUPPORT */
-   0, /* VDP_FUNC_ID_VIDEO_MIXER_GET_FEATURE_ENABLES */
-   0, /* VDP_FUNC_ID_VIDEO_MIXER_GET_PARAMETER_VALUES */
-   0, /* VDP_FUNC_ID_VIDEO_MIXER_GET_ATTRIBUTE_VALUES */
-   0, /* VDP_FUNC_ID_VIDEO_MIXER_DESTROY */
-   0, /* VDP_FUNC_ID_VIDEO_MIXER_RENDER */
+   &vlVdpVideoMixerCreate, /* VDP_FUNC_ID_VIDEO_MIXER_CREATE */
+   &vlVdpVideoMixerSetFeatureEnables, /* VDP_FUNC_ID_VIDEO_MIXER_SET_FEATURE_ENABLES */
+   &vlVdpVideoMixerSetAttributeValues, /* VDP_FUNC_ID_VIDEO_MIXER_SET_ATTRIBUTE_VALUES */
+   0x16, /* VDP_FUNC_ID_VIDEO_MIXER_GET_FEATURE_SUPPORT */
+   0x17, /* VDP_FUNC_ID_VIDEO_MIXER_GET_FEATURE_ENABLES */
+   0x18, /* VDP_FUNC_ID_VIDEO_MIXER_GET_PARAMETER_VALUES */
+   0x19, /* VDP_FUNC_ID_VIDEO_MIXER_GET_ATTRIBUTE_VALUES */
+   0x20, /* VDP_FUNC_ID_VIDEO_MIXER_DESTROY */
+   &vlVdpVideoMixerRender, /* VDP_FUNC_ID_VIDEO_MIXER_RENDER */
    &vlVdpPresentationQueueTargetDestroy, /* VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_DESTROY */
    &vlVdpPresentationQueueCreate, /* VDP_FUNC_ID_PRESENTATION_QUEUE_CREATE */
    &vlVdpPresentationQueueDestroy, /* VDP_FUNC_ID_PRESENTATION_QUEUE_DESTROY */
    &vlVdpPresentationQueueSetBackgroundColor, /* VDP_FUNC_ID_PRESENTATION_QUEUE_SET_BACKGROUND_COLOR */
    &vlVdpPresentationQueueGetBackgroundColor, /* VDP_FUNC_ID_PRESENTATION_QUEUE_GET_BACKGROUND_COLOR */
-   0,
-   0,
+   0x55,	/* DUMMY */
+   0x55,	/* DUMMY */
    &vlVdpPresentationQueueGetTime, /* VDP_FUNC_ID_PRESENTATION_QUEUE_GET_TIME */
    &vlVdpPresentationQueueDisplay, /* VDP_FUNC_ID_PRESENTATION_QUEUE_DISPLAY */
    &vlVdpPresentationQueueBlockUntilSurfaceIdle, /* VDP_FUNC_ID_PRESENTATION_QUEUE_BLOCK_UNTIL_SURFACE_IDLE */
    &vlVdpPresentationQueueQuerySurfaceStatus, /* VDP_FUNC_ID_PRESENTATION_QUEUE_QUERY_SURFACE_STATUS */
-   0  /* VDP_FUNC_ID_PREEMPTION_CALLBACK_REGISTER */
+   &vlVdpPreemptionCallbackRegister  /* VDP_FUNC_ID_PREEMPTION_CALLBACK_REGISTER */
 };
 
 static void* ftab_winsys[1] =
 {
-   0  /* VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_CREATE_X11 */
+   &vlVdpPresentationQueueTargetCreateX11  /* VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_CREATE_X11 */
 };
 
 boolean vlGetFuncFTAB(VdpFuncId function_id, void **func)
diff --git a/src/gallium/state_trackers/vdpau/header.c b/src/gallium/state_trackers/vdpau/header.c
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index e69de29bb2d..8bf42f53ff2 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -0,0 +1,135 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ 
+ #include <vdpau/vdpau.h>
+ #include <util/u_memory.h>
+  #include <util/u_debug.h>
+  #include "vdpau_private.h"
+
+ 
+ VdpStatus 	
+ vlVdpVideoMixerCreate (VdpDevice device, 
+						uint32_t feature_count, 
+						VdpVideoMixerFeature const *features, 
+						uint32_t parameter_count, 
+						VdpVideoMixerParameter const *parameters, 
+						void const *const *parameter_values, 
+						VdpVideoMixer *mixer)
+{
+	VdpStatus ret;
+	vlVdpVideoMixer *vmixer = NULL;
+	
+	debug_printf("[VDPAU] Creating VideoMixer\n");
+	
+	vlVdpDevice *dev = vlGetDataHTAB(device);
+	if (!dev)
+      return VDP_STATUS_INVALID_HANDLE;
+	  
+	vmixer = CALLOC(1, sizeof(vlVdpVideoMixer));
+	if (!vmixer)
+      return VDP_STATUS_RESOURCES;
+	  
+	vmixer->device = dev;
+	  /*
+	   * TODO: Handle features and parameters
+	   * */
+	  
+	*mixer = vlAddDataHTAB(vmixer);
+    if (*mixer == 0) {
+      ret = VDP_STATUS_ERROR;
+      goto no_handle;
+    }
+   
+   
+   return VDP_STATUS_OK;
+   no_handle:
+   return ret;
+}
+
+VdpStatus
+vlVdpVideoMixerSetFeatureEnables (
+			VdpVideoMixer mixer, 
+			uint32_t feature_count, 
+			VdpVideoMixerFeature const *features, 
+			VdpBool const *feature_enables)
+{
+	debug_printf("[VDPAU] Setting VideoMixer features\n");
+	
+	if (!(features && feature_enables))	
+		return VDP_STATUS_INVALID_POINTER;
+	
+	vlVdpVideoMixer *vmixer = vlGetDataHTAB(mixer);
+	if (!vmixer)
+		return VDP_STATUS_INVALID_HANDLE;
+		
+	/*
+	   * TODO: Set features
+	   * */
+	
+	
+	return VDP_STATUS_OK;
+}
+
+VdpStatus vlVdpVideoMixerRender (
+		VdpVideoMixer mixer, 
+		VdpOutputSurface background_surface, 
+		VdpRect const *background_source_rect, 
+		VdpVideoMixerPictureStructure current_picture_structure, 
+		uint32_t video_surface_past_count, 
+		VdpVideoSurface const *video_surface_past, 
+		VdpVideoSurface video_surface_current, 
+		uint32_t video_surface_future_count, 
+		VdpVideoSurface const *video_surface_future, 
+		VdpRect const *video_source_rect, 
+		VdpOutputSurface destination_surface, 
+		VdpRect const *destination_rect, 
+		VdpRect const *destination_video_rect, 
+		uint32_t layer_count, 
+		VdpLayer const *layers)
+{
+	if (!(background_source_rect && video_surface_past && video_surface_future && video_source_rect && destination_rect && destination_video_rect && layers))	
+		return VDP_STATUS_INVALID_POINTER;
+
+	return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpVideoMixerSetAttributeValues (
+		VdpVideoMixer mixer, 
+		uint32_t attribute_count, 
+		VdpVideoMixerAttribute const *attributes, 
+		void const *const *attribute_values)
+{
+	if (!(attributes && attribute_values))	
+		return VDP_STATUS_INVALID_POINTER;
+	
+	vlVdpVideoMixer *vmixer = vlGetDataHTAB(mixer);
+	if (!vmixer)
+		return VDP_STATUS_INVALID_HANDLE;
+	
+	return VDP_STATUS_OK;
+}
\ No newline at end of file
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
index d88afb495f7..3c456a07ca1 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
@@ -73,6 +73,8 @@ vlVdpMPEG2BitstreamToMacroblock (
 	bool b_header_done = false;
 	struct vdpMPEG2BitstreamParser parser;
 	
+	debug_printf("[VDPAU] Starting decoding MPEG2 stream");
+	
 	num_macroblocks[0] = 0;
 	
 	memset(&parser,0,sizeof(parser));
diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
index c5f06896c58..20097eaf98c 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -28,6 +28,7 @@
 #include "vdpau_private.h"
 #include <vdpau/vdpau.h>
 #include <util/u_debug.h>
+#include <util/u_memory.h>
 
 VdpStatus
 vlVdpOutputSurfaceCreate (	VdpDevice  device, 
@@ -35,9 +36,29 @@ vlVdpOutputSurfaceCreate (	VdpDevice  device,
 							uint32_t width, uint32_t height, 
 							VdpOutputSurface  *surface)
 {
+	vlVdpOutputSurface *vlsurface = NULL;
+	
 	debug_printf("[VDPAU] Creating output surface\n");
 	if (!(width && height))
 		return VDP_STATUS_INVALID_SIZE;
+		
+	vlVdpDevice *dev = vlGetDataHTAB(device);
+	if (!dev)
+      return VDP_STATUS_INVALID_HANDLE;
+	  
+	vlsurface = CALLOC(1, sizeof(vlVdpOutputSurface));
+    if (!vlsurface)
+      return VDP_STATUS_RESOURCES;
+	  
+	vlsurface->width = width;
+	vlsurface->height = height;
+	vlsurface->format = FormatRGBAToPipe(rgba_format);
+	  
+	*surface = vlAddDataHTAB(vlsurface);
+   if (*surface == 0) {
+      FREE(dev);
+	  return VDP_STATUS_ERROR;
+   }
 	
-	return VDP_STATUS_NO_IMPLEMENTATION;
+	return VDP_STATUS_OK;
 }
\ No newline at end of file
diff --git a/src/gallium/state_trackers/vdpau/preemption.c b/src/gallium/state_trackers/vdpau/preemption.c
index e69de29bb2d..4572bdcfe6d 100644
--- a/src/gallium/state_trackers/vdpau/preemption.c
+++ b/src/gallium/state_trackers/vdpau/preemption.c
@@ -0,0 +1,39 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ 
+ #include <vdpau/vdpau.h>
+ 
+ void vlVdpPreemptionCallback (VdpDevice device, void *context)
+ {
+	 /* TODO: Implement preemption */
+ }
+ 
+ VdpStatus vlVdpPreemptionCallbackRegister (VdpDevice device, VdpPreemptionCallback callback, void *context)
+ {
+	 
+	 return VDP_STATUS_OK;
+ }
\ No newline at end of file
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index 8200cf04326..5f545d0bb27 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -28,6 +28,7 @@
 #include "vdpau_private.h"
 #include <vdpau/vdpau.h>
 #include <util/u_debug.h>
+#include <util/u_memory.h>
 
 VdpStatus
 vlVdpPresentationQueueTargetDestroy (VdpPresentationQueueTarget  presentation_queue_target)
@@ -41,12 +42,39 @@ vlVdpPresentationQueueCreate (	VdpDevice  device,
 								VdpPresentationQueueTarget  presentation_queue_target, 
 								VdpPresentationQueue  *presentation_queue)
 {
-	debug_printf("[VDPAU] Creating presentation queue\n");
+	debug_printf("[VDPAU] Creating PresentationQueue\n");
+	VdpStatus    ret;
+	vlVdpPresentationQueue *pq = NULL;
 	
 	if (!presentation_queue)
 		return VDP_STATUS_INVALID_POINTER;
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
+	  
+   vlVdpDevice *dev = vlGetDataHTAB(device);
+   if (!dev)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   vlVdpPresentationQueueTarget *pqt = vlGetDataHTAB(presentation_queue_target);
+   if (!pqt)
+	   return VDP_STATUS_INVALID_HANDLE;
+	   
+	if (dev != pqt->device)
+		return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
+
+   pq = CALLOC(1, sizeof(vlVdpPresentationQueue));
+   if (!pq)
+      return VDP_STATUS_RESOURCES;
+	  
+	*presentation_queue = vlAddDataHTAB(pq);
+   if (*presentation_queue == 0) {
+      ret = VDP_STATUS_ERROR;
+      goto no_handle;
+   }
+
+
+	return VDP_STATUS_OK;
+    no_handle:
+    FREE(pq);
+	return ret;
 }
 
 VdpStatus
diff --git a/src/gallium/state_trackers/vdpau/render.c b/src/gallium/state_trackers/vdpau/render.c
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 635d6c8acdb..36ef124c13d 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -30,6 +30,7 @@
 
 
 #include <vdpau/vdpau.h>
+#include <vdpau/vdpau_x11.h>
 #include <pipe/p_compiler.h>
 #include <pipe/p_video_context.h>
 #include <vl_winsys.h>
@@ -73,6 +74,7 @@ static VdpChromaType PipeToType(enum pipe_video_chroma_format pipe_type)
    return -1;
 }
 
+
 static enum pipe_format FormatToPipe(VdpYCbCrFormat vdpau_format)
 {
    switch (vdpau_format) {
@@ -95,6 +97,26 @@ static enum pipe_format FormatToPipe(VdpYCbCrFormat vdpau_format)
    return -1;
 }
 
+static enum pipe_format FormatRGBAToPipe(VdpRGBAFormat vdpau_format)
+{
+   switch (vdpau_format) {
+      case VDP_RGBA_FORMAT_A8:
+         return PIPE_FORMAT_A8_UNORM;
+      case VDP_RGBA_FORMAT_B10G10R10A2:
+         return PIPE_FORMAT_B10G10R10A2_UNORM;
+      case VDP_RGBA_FORMAT_B8G8R8A8:
+         return PIPE_FORMAT_B8G8R8A8_UNORM;
+      case VDP_RGBA_FORMAT_R10G10B10A2:
+         return PIPE_FORMAT_R10G10B10A2_UNORM;
+      case VDP_RGBA_FORMAT_R8G8B8A8:
+         return PIPE_FORMAT_R8G8B8A8_UNORM;
+      default:
+         assert(0);
+   }
+
+   return -1;
+}
+
 static VdpYCbCrFormat PipeToFormat(enum pipe_format p_format)
 {
    switch (p_format) {
@@ -145,6 +167,23 @@ typedef struct
    int screen;
 } vlVdpDevice;
 
+typedef struct
+{
+   vlVdpDevice *device;
+   Drawable drawable;
+} vlVdpPresentationQueueTarget;
+
+typedef struct
+{
+   vlVdpDevice *device;
+   Drawable drawable;
+} vlVdpPresentationQueue;
+
+typedef struct
+{
+	vlVdpDevice *device;
+} vlVdpVideoMixer;
+
 typedef struct
 {
    vlVdpDevice *device;
@@ -157,6 +196,14 @@ typedef struct
    uint8_t *data;
 } vlVdpSurface;
 
+typedef struct
+{
+   vlVdpDevice *device;
+   uint32_t width;
+   uint32_t height;
+   enum pipe_format format;
+} vlVdpOutputSurface;
+
 typedef struct
 {
 	vlVdpDevice *device;
@@ -174,6 +221,11 @@ vlHandle vlAddDataHTAB(void *data);
 void* vlGetDataHTAB(vlHandle handle);
 boolean vlGetFuncFTAB(VdpFuncId function_id, void **func);
 
+/* Public functions */
+VdpDeviceCreateX11 vdp_imp_device_create_x11;
+VdpPresentationQueueTargetCreateX11 vlVdpPresentationQueueTargetCreateX11;
+
+/* Internal function pointers */
 VdpGetErrorString vlVdpGetErrorString;
 VdpDeviceDestroy vlVdpDeviceDestroy;
 VdpGetProcAddress vlVdpGetProcAddress;
@@ -213,4 +265,13 @@ VdpPresentationQueueGetTime vlVdpPresentationQueueGetTime;
 VdpPresentationQueueDisplay vlVdpPresentationQueueDisplay;
 VdpPresentationQueueBlockUntilSurfaceIdle vlVdpPresentationQueueBlockUntilSurfaceIdle;
 VdpPresentationQueueQuerySurfaceStatus vlVdpPresentationQueueQuerySurfaceStatus;
+VdpPreemptionCallback vlVdpPreemptionCallback;
+VdpPreemptionCallbackRegister vlVdpPreemptionCallbackRegister;
+VdpVideoMixerSetFeatureEnables vlVdpVideoMixerSetFeatureEnables;
+VdpVideoMixerCreate vlVdpVideoMixerCreate;
+VdpVideoMixerRender vlVdpVideoMixerRender;
+VdpVideoMixerSetAttributeValues vlVdpVideoMixerSetAttributeValues;
+VdpGenerateCSCMatrix vlVdpGenerateCSCMatrix;
+
+
 #endif // VDPAU_PRIVATE_H
-- 
cgit v1.2.3


From bff1ac875c2c62ba5045bb953f800253c49361cb Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Thu, 7 Oct 2010 00:26:46 +0200
Subject: vl: some more fixes and addition to the decoder handling

---
 src/gallium/state_trackers/vdpau/decode.c        | 27 ++++++++++++++----------
 src/gallium/state_trackers/vdpau/device.c        | 10 +++++++++
 src/gallium/state_trackers/vdpau/mixer.c         |  5 +++++
 src/gallium/state_trackers/vdpau/vdpau_private.h |  8 ++++---
 4 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index f6003304668..1b49b4b2520 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -74,7 +74,10 @@ vlVdpDecoderCreate ( 	VdpDevice device,
 	// TODO: Define max_references. Used mainly for H264
 	
 	vldecoder->profile = p_profile;
+	vldecoder->height = height;
+	vldecoder->width = width;
 	vldecoder->device = dev;
+	vldecoder->vctx = NULL;
 		
 	*decoder = vlAddDataHTAB(vldecoder);
 	if (*decoder == 0) {
@@ -127,27 +130,27 @@ vlVdpCreateSurfaceTarget   (vlVdpDecoder *vldecoder,
 {
 	struct pipe_resource tmplt;
 	struct pipe_resource *surf_tex;
-	struct pipe_video_context *vpipe;
+	struct pipe_video_context *vctx;
 	
 	debug_printf("[VDPAU] Creating surface\n");
 		
 	if(!(vldecoder && vlsurf))
 		return VDP_STATUS_INVALID_POINTER;
 		
-	vpipe = vldecoder->vctx;
+	vctx = vldecoder->vctx;
 		
 	memset(&tmplt, 0, sizeof(struct pipe_resource));
 	tmplt.target = PIPE_TEXTURE_2D;
 	tmplt.format = vlsurf->format;
 	tmplt.last_level = 0;
-	if (vpipe->is_format_supported(vpipe, tmplt.format,
+	if (vctx->is_format_supported(vctx, tmplt.format,
                                   PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET,
                                   PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO)) {
       tmplt.width0 = vlsurf->width;
       tmplt.height0 = vlsurf->height;
     }
     else {
-      assert(vpipe->is_format_supported(vpipe, tmplt.format,
+      assert(vctx->is_format_supported(vctx, tmplt.format,
                                        PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET,
                                        PIPE_TEXTURE_GEOM_NON_SQUARE));
       tmplt.width0 = util_next_power_of_two(vlsurf->width);
@@ -158,9 +161,9 @@ vlVdpCreateSurfaceTarget   (vlVdpDecoder *vldecoder,
 	tmplt.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
 	tmplt.flags = 0;
 	
-	surf_tex = vpipe->screen->resource_create(vpipe->screen, &tmplt);
+	surf_tex = vctx->screen->resource_create(vctx->screen, &tmplt);
 	
-	vlsurf->psurface = vpipe->screen->get_tex_surface(vpipe->screen, surf_tex, 0, 0, 0,
+	vlsurf->psurface = vctx->screen->get_tex_surface(vctx->screen, surf_tex, 0, 0, 0,
                                          PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
 										 
 	pipe_resource_reference(&surf_tex, NULL);
@@ -193,7 +196,6 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 	
 	debug_printf("[VDPAU] Decoding MPEG2\n");
 
-	vpipe = vldecoder->vctx->vpipe;
 	t_vdp_surf = vlsurf;
 	
 	/* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */
@@ -218,6 +220,8 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 	
 	ret = vlVdpCreateSurfaceTarget(vldecoder,t_vdp_surf);
 
+	vpipe = vldecoder->vctx->vpipe;
+
 	if (vlVdpMPEG2BitstreamToMacroblock(vpipe->screen, bitstream_buffers, bitstream_buffer_count,
                      &num_macroblocks, &pipe_macroblocks))
 					 {
@@ -228,7 +232,7 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 					 }
 		
 	vpipe->set_decode_target(vpipe,t_surf);
-	//vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks, (struct pipe_macroblock *)pipe_macroblocks, NULL);
+	vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks, (struct pipe_macroblock *)pipe_macroblocks, NULL);
 	
 	skip_frame:
 	return ret;
@@ -263,14 +267,15 @@ vlVdpDecoderRender (VdpDecoder decoder,
 	if (vlsurf->device != vldecoder->device)
 		return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
 		
-	if (vlsurf->chroma_format != vldecoder->chroma_format)
-		return VDP_STATUS_INVALID_CHROMA_TYPE;
+	/* Test doesn't make sence */
+	/*if (vlsurf->chroma_format != vldecoder->chroma_format)
+		return VDP_STATUS_INVALID_CHROMA_TYPE;*/
 		
 	vscreen = vl_screen_create(vldecoder->device->display, vldecoder->device->screen);
 	if (!vscreen)
 		return VDP_STATUS_RESOURCES;
 	
-	vldecoder->vctx = vl_video_create(vscreen, vldecoder->profile, vlsurf->format, vlsurf->width, vlsurf->height);
+	vldecoder->vctx = vl_video_create(vscreen, vldecoder->profile, vlsurf->format, vldecoder->width, vldecoder->height);
 	if (!vldecoder->vctx)
 		return VDP_STATUS_RESOURCES;
 		
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index 4ca198e874d..496e2b8def0 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -26,6 +26,7 @@
  **************************************************************************/
 
 #include <pipe/p_compiler.h>
+#include <pipe/p_video_context.h>
 #include <vl_winsys.h>
 #include <util/u_memory.h>
 #include <util/u_debug.h>
@@ -51,8 +52,15 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, VdpGe
       ret = VDP_STATUS_RESOURCES;
       goto no_dev;
    }
+
    dev->display = display;
    dev->screen = screen;
+   dev->vscreen = vl_screen_create(display, screen);
+   if (!dev->vscreen)
+	   {
+      ret = VDP_STATUS_RESOURCES;
+      goto no_vscreen;
+   }
 
    *device = vlAddDataHTAB(dev);
    if (*device == 0) {
@@ -66,6 +74,8 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, VdpGe
    return VDP_STATUS_OK;
 
 no_handle:
+   /* Destroy vscreen */
+no_vscreen:
    FREE(dev);
 no_dev:
    vlDestroyHTAB();
diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index 8bf42f53ff2..124125ebaad 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -130,6 +130,11 @@ vlVdpVideoMixerSetAttributeValues (
 	vlVdpVideoMixer *vmixer = vlGetDataHTAB(mixer);
 	if (!vmixer)
 		return VDP_STATUS_INVALID_HANDLE;
+		
+	/*
+	 * TODO: Implement the function
+	 * 
+	 * */
 	
 	return VDP_STATUS_OK;
 }
\ No newline at end of file
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 36ef124c13d..de206365ce2 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -163,8 +163,9 @@ static enum pipe_video_profile ProfileToPipe(VdpDecoderProfile vdpau_profile)
 
 typedef struct
 {
-   void *display;
+   Display *display;
    int screen;
+   struct vl_screen *vscreen;
 } vlVdpDevice;
 
 typedef struct
@@ -207,10 +208,11 @@ typedef struct
 typedef struct
 {
 	vlVdpDevice *device;
-	struct vl_screen *vlscreen;
-    	struct vl_context *vctx;
+    struct vl_context *vctx;
 	enum pipe_video_chroma_format chroma_format;
 	enum pipe_video_profile profile;
+	uint32_t width;
+	uint32_t height;
 } vlVdpDecoder;
 
 typedef uint32_t vlHandle;
-- 
cgit v1.2.3


From 7d2bdc2d4db8321a72edcc921a0fcfa4e4d41ef9 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Fri, 8 Oct 2010 13:59:31 +0200
Subject: vl: bitstream decoder finds startcodes

---
 src/gallium/drivers/softpipe/sp_video_context.c       |  1 +
 src/gallium/state_trackers/vdpau/decode.c             | 19 +++++++++----------
 src/gallium/state_trackers/vdpau/ftab.c               |  2 +-
 .../state_trackers/vdpau/mpeg2_bitstream_parser.c     | 17 +++++++++--------
 .../state_trackers/vdpau/mpeg2_bitstream_parser.h     |  6 +++++-
 src/gallium/state_trackers/vdpau/surface.c            |  5 +++--
 src/gallium/state_trackers/vdpau/vdpau_private.h      |  1 -
 7 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 419ba946b89..a8c1b14428f 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -429,6 +429,7 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
    ctx->base.height = height;
 
    ctx->base.screen = pipe->screen;
+
    ctx->base.destroy = sp_mpeg12_destroy;
    ctx->base.get_param = sp_mpeg12_get_param;
    ctx->base.is_format_supported = sp_mpeg12_is_format_supported;
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 1b49b4b2520..5d3674c5eb2 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -40,10 +40,9 @@ vlVdpDecoderCreate ( 	VdpDevice device,
 						VdpDecoder *decoder 
 )
 {
-	struct vl_screen *vscreen;
-	enum pipe_video_profile p_profile;
-	VdpStatus ret;
-	vlVdpDecoder *vldecoder;
+	enum pipe_video_profile p_profile = PIPE_VIDEO_PROFILE_UNKNOWN;
+	VdpStatus ret = VDP_STATUS_OK;
+	vlVdpDecoder *vldecoder = NULL;
 	
 	debug_printf("[VDPAU] Creating decoder\n");
 	
@@ -137,12 +136,13 @@ vlVdpCreateSurfaceTarget   (vlVdpDecoder *vldecoder,
 	if(!(vldecoder && vlsurf))
 		return VDP_STATUS_INVALID_POINTER;
 		
-	vctx = vldecoder->vctx;
+	vctx = vldecoder->vctx->vpipe;
 		
 	memset(&tmplt, 0, sizeof(struct pipe_resource));
 	tmplt.target = PIPE_TEXTURE_2D;
-	tmplt.format = vlsurf->format;
+	tmplt.format = vctx->get_param(vctx,PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT);
 	tmplt.last_level = 0;
+
 	if (vctx->is_format_supported(vctx, tmplt.format,
                                   PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET,
                                   PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO)) {
@@ -156,6 +156,7 @@ vlVdpCreateSurfaceTarget   (vlVdpDecoder *vldecoder,
       tmplt.width0 = util_next_power_of_two(vlsurf->width);
       tmplt.height0 = util_next_power_of_two(vlsurf->height);
     }
+	
 	tmplt.depth0 = 1;
 	tmplt.usage = PIPE_USAGE_DEFAULT;
 	tmplt.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
@@ -170,7 +171,7 @@ vlVdpCreateSurfaceTarget   (vlVdpDecoder *vldecoder,
 	
 	if (!vlsurf->psurface)
 		return VDP_STATUS_RESOURCES;
-	
+	debug_printf("[VDPAU] Done creating surface\n");
 	
 	return VDP_STATUS_OK;
 }
@@ -275,12 +276,10 @@ vlVdpDecoderRender (VdpDecoder decoder,
 	if (!vscreen)
 		return VDP_STATUS_RESOURCES;
 	
-	vldecoder->vctx = vl_video_create(vscreen, vldecoder->profile, vlsurf->format, vldecoder->width, vldecoder->height);
+	vldecoder->vctx = vl_video_create(vscreen, vldecoder->profile, vlsurf->chroma_format, vldecoder->width, vldecoder->height);
 	if (!vldecoder->vctx)
 		return VDP_STATUS_RESOURCES;
 		
-	vldecoder->vctx->vscreen = vscreen;
-		
     // TODO: Right now only mpeg2 is supported.
 	switch (vldecoder->vctx->vpipe->profile)   {
 		case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
diff --git a/src/gallium/state_trackers/vdpau/ftab.c b/src/gallium/state_trackers/vdpau/ftab.c
index 2142dcd4f6a..de08b810268 100644
--- a/src/gallium/state_trackers/vdpau/ftab.c
+++ b/src/gallium/state_trackers/vdpau/ftab.c
@@ -33,7 +33,7 @@ static void* ftab[67] =
    &vlVdpGetErrorString, /* VDP_FUNC_ID_GET_ERROR_STRING */
    &vlVdpGetProcAddress, /* VDP_FUNC_ID_GET_PROC_ADDRESS */
    &vlVdpGetApiVersion, /* VDP_FUNC_ID_GET_API_VERSION */
-   0x555,					/* DUMMY */
+   0x55,					/* DUMMY */
    &vlVdpGetInformationString, /* VDP_FUNC_ID_GET_INFORMATION_STRING */
    &vlVdpDeviceDestroy, /* VDP_FUNC_ID_DEVICE_DESTROY */
    &vlVdpGenerateCSCMatrix, /* VDP_FUNC_ID_GENERATE_CSC_MATRIX */
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
index 3c456a07ca1..436e7908e5b 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
@@ -30,8 +30,9 @@
 int
 vlVdpMPEG2NextStartCode(struct vdpMPEG2BitstreamParser *parser)
 {
-	uint32_t integer = 0;
-	uint32_t bytes_to_end;
+	uint32_t integer = 0xffffff00;
+	uint8_t * ptr_read = parser->ptr_bitstream;
+	int32_t bytes_to_end;
 	
 	/* Move cursor to the start of a byte */
 	while(parser->cursor % 8)
@@ -47,9 +48,9 @@ vlVdpMPEG2NextStartCode(struct vdpMPEG2BitstreamParser *parser)
 			parser->state = MPEG2_HEADER_DONE;
 			return 1;
 		}
-		
-		integer << 8;
-		integer = integer & (unsigned char)(parser->ptr_bitstream + parser->cursor/8)[0];
+		integer = ( integer | *ptr_read++ ) << 8;
+	
+		debug_printf("[VDPAU][Bitstream parser] Current read uint32_t: %08x .. Bytes to end: %d\n", integer,bytes_to_end);
 	
 		bytes_to_end--;
 		parser->cursor += 8;
@@ -57,7 +58,7 @@ vlVdpMPEG2NextStartCode(struct vdpMPEG2BitstreamParser *parser)
 	}
 	
 	/* start_code found. rewind cursor a byte */
-	parser->cursor -= 8;
+	//parser->cursor -= 8;
 	
 	return 0;
 }
@@ -89,8 +90,8 @@ vlVdpMPEG2BitstreamToMacroblock (
 		{
 		case MPEG2_HEADER_START_CODE:
 			if (vlVdpMPEG2NextStartCode(&parser))
-				continue;
-			
+				exit(1);
+			debug_printf("[VDPAU] START_CODE: %02x\n",(parser.ptr_bitstream + parser.cursor/8)[0]);
 			/* Start_code found */
 			switch ((parser.ptr_bitstream + parser.cursor/8)[0])
 			{
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
index 74a216a4d81..b7e778f780b 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
@@ -44,7 +44,11 @@ struct vdpMPEG2BitstreamParser
 	uint32_t cursor;                // current bit cursor
 	uint32_t cur_bitstream;
 	uint32_t cur_bitstream_length;
-	unsigned char *ptr_bitstream;
+	uint8_t *ptr_bitstream;
+	
+	/* The decoded bitstream goes here: */
+	/* Sequence_header_info */
+	uint32_t horizontal_size_value;
 };
 
 int
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index f957d94bdf7..9b6dac9c3f4 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -30,7 +30,6 @@
 #include <pipe/p_state.h>
 #include <util/u_memory.h>
 #include <util/u_format.h>
-#include <stdio.h>
 
 VdpStatus
 vlVdpVideoSurfaceCreate(VdpDevice device,
@@ -68,8 +67,10 @@ vlVdpVideoSurfaceCreate(VdpDevice device,
  goto inv_device;
  }
 
- p_surf->chroma_format = FormatToPipe(chroma_type);
+ p_surf->chroma_format = TypeToPipe(chroma_type);
  p_surf->device = dev;
+ p_surf->width = width;
+ p_surf->height = height;
 
  *surface = vlAddDataHTAB(p_surf);
  if (*surface == 0) {
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index de206365ce2..d582b8e6c29 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -192,7 +192,6 @@ typedef struct
    uint32_t height;
    uint32_t pitch;
    struct pipe_surface *psurface;
-   enum pipe_format format;
    enum pipe_video_chroma_format chroma_format;
    uint8_t *data;
 } vlVdpSurface;
-- 
cgit v1.2.3


From 2990292f0fdf36ae55c909da84f8927dc1aa9ae1 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Wed, 13 Oct 2010 11:27:07 +0200
Subject: vl: more work on the bitstream_parser

---
 .../state_trackers/vdpau/mpeg2_bitstream_parser.c  | 83 +++++++++++++++-------
 .../state_trackers/vdpau/mpeg2_bitstream_parser.h  | 18 +++--
 2 files changed, 70 insertions(+), 31 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
index 436e7908e5b..90936584893 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
@@ -24,7 +24,8 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  **************************************************************************/
-
+#include <stdio.h>
+#include <stdlib.h>
 #include "mpeg2_bitstream_parser.h"
 
 int
@@ -32,31 +33,24 @@ vlVdpMPEG2NextStartCode(struct vdpMPEG2BitstreamParser *parser)
 {
 	uint32_t integer = 0xffffff00;
 	uint8_t * ptr_read = parser->ptr_bitstream;
-	int32_t bytes_to_end;
-	
-	/* Move cursor to the start of a byte */
-	while(parser->cursor % 8)
-		parser->cursor++;
+	int8_t * bytes_to_end;
 		
-	bytes_to_end = parser->cur_bitstream_length - parser->cursor/8 - 1;
+	bytes_to_end = parser->ptr_bitstream_end - parser->ptr_bitstream;
 		
 	/* Read byte after byte, until startcode is found */
 	while(integer != 0x00000100)
 	{
-		if (bytes_to_end < 0)
+		if (bytes_to_end <= 0)
 		{
-			parser->state = MPEG2_HEADER_DONE;
-			return 1;
+			parser->state = MPEG2_BITSTREAM_DONE;
+			parser->code = 0;
+			return 0;
 		}
 		integer = ( integer | *ptr_read++ ) << 8;
-	
-		debug_printf("[VDPAU][Bitstream parser] Current read uint32_t: %08x .. Bytes to end: %d\n", integer,bytes_to_end);
-	
-		bytes_to_end--;
-		parser->cursor += 8;
-		
+		bytes_to_end--;	
 	}
-	
+	parser->ptr_bitstream = ptr_read;
+	parser->code = parser->ptr_bitstream;
 	/* start_code found. rewind cursor a byte */
 	//parser->cursor -= 8;
 	
@@ -74,37 +68,74 @@ vlVdpMPEG2BitstreamToMacroblock (
 	bool b_header_done = false;
 	struct vdpMPEG2BitstreamParser parser;
 	
-	debug_printf("[VDPAU] Starting decoding MPEG2 stream");
+	#if(1)
+	FILE *fp;
+   
+      if ((fp = fopen("binout", "w"))==NULL) {
+        printf("Cannot open file.\n");
+        exit(1);
+      }
+	fwrite(bitstream_buffers[0].bitstream, 1, bitstream_buffers[0].bitstream_bytes, fp);
+	fclose(fp);
+	
+	#endif
+	
+	
+	debug_printf("[VDPAU] Starting decoding MPEG2 stream\n");
 	
 	num_macroblocks[0] = 0;
 	
 	memset(&parser,0,sizeof(parser));
 	parser.state = MPEG2_HEADER_START_CODE;
-	parser.cur_bitstream_length = bitstream_buffers[0].bitstream_bytes;
 	parser.ptr_bitstream = (unsigned char *)bitstream_buffers[0].bitstream;
+	parser.ptr_bitstream_end = parser.ptr_bitstream + bitstream_buffers[0].bitstream_bytes;
 	
 	/* Main header parser loop */
 	while(!b_header_done)
 	{
 		switch (parser.state)
 		{
-		case MPEG2_HEADER_START_CODE:
+		case MPEG2_SEEK_HEADER:
 			if (vlVdpMPEG2NextStartCode(&parser))
 				exit(1);
-			debug_printf("[VDPAU] START_CODE: %02x\n",(parser.ptr_bitstream + parser.cursor/8)[0]);
+			break;
 			/* Start_code found */
-			switch ((parser.ptr_bitstream + parser.cursor/8)[0])
+			switch (parser.code)
 			{
 				/* sequence_header_code */
 				case 0xB3:
-				debug_printf("[VDPAU][Bitstream parser] Sequence header code found at cursor pos: %d\n", parser.cursor);
-				exit(1);
+				debug_printf("[VDPAU][Bitstream parser] Sequence header code found\n");
+				
+				/* We dont need to read this, because we already have this information */
+				break;
+				case 0xB5:
+				debug_printf("[VDPAU][Bitstream parser] Extension start code found\n");
+				//exit(1);
+				break;
+				
+				case 0xB8:
+				debug_printf("[VDPAU][Bitstream parser] Extension start code found\n");
+				//exit(1);
 				break;
+				
 			}
 		
 		break;
-		case MPEG2_HEADER_DONE:
-			debug_printf("[VDPAU][Bitstream parser] Done parsing current header\n");
+		case MPEG2_BITSTREAM_DONE:
+			if (parser.cur_bitstream < bitstream_buffer_count - 1)
+			{
+				debug_printf("[VDPAU][Bitstream parser] Done parsing current bitstream. Moving to the next\n");
+				parser.cur_bitstream++;
+				parser.ptr_bitstream = (unsigned char *)bitstream_buffers[parser.cur_bitstream].bitstream;
+				parser.ptr_bitstream_end = parser.ptr_bitstream + bitstream_buffers[parser.cur_bitstream].bitstream_bytes; 
+				parser.state = MPEG2_HEADER_START_CODE;
+			}
+			else
+			{
+				debug_printf("[VDPAU][Bitstream parser] Done with frame\n");
+				exit(0);
+				// return 0;
+			}
 		break;
 		
 		}
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
index b7e778f780b..414d6597c6c 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
@@ -34,17 +34,25 @@
 
 enum vdpMPEG2States
 {
-	MPEG2_HEADER_START_CODE,
-	MPEG2_HEADER_DONE
+	MPEG2_SEEK_HEADER,
+	MPEG2_HEADER_DONE,
+	MPEG2_BITSTREAM_DONE
+	MPEG2
+};
+
+enum vdpMPEG2Action
+{
+	MPEG2_
 };
 
 struct vdpMPEG2BitstreamParser
 {
 	enum vdpMPEG2States state;
-	uint32_t cursor;                // current bit cursor
+	enum vdpMPEG2Actions action;
 	uint32_t cur_bitstream;
-	uint32_t cur_bitstream_length;
-	uint8_t *ptr_bitstream;
+	const uint8_t *ptr_bitstream_end;
+	const uint8_t *ptr_bitstream;
+	uint8_t code;
 	
 	/* The decoded bitstream goes here: */
 	/* Sequence_header_info */
-- 
cgit v1.2.3


From b13a0af510b71e86f9634dc959dc355ea7044674 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 16 Oct 2010 16:41:09 +0200
Subject: Fix problems created by Merge

---
 src/gallium/auxiliary/vl/vl_compositor.c         |  3 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 29 ++++++++---------
 src/gallium/drivers/softpipe/sp_video_context.c  | 40 +++++++++++++++---------
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c       |  2 +-
 4 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 0640b1a4565..01d09602558 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -26,6 +26,7 @@
  **************************************************************************/
 
 #include "vl_compositor.h"
+#include "util/u_draw.h"
 #include <assert.h>
 #include <pipe/p_context.h>
 #include <util/u_inlines.h>
@@ -555,7 +556,7 @@ static void draw_layers(struct vl_compositor *c,
 
       c->pipe->bind_fs_state(c->pipe, frag_shaders[i]);
       c->pipe->set_fragment_sampler_views(c->pipe, 1, &surface_view);
-      c->pipe->draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
+      util_draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
 
       if (delete_view) {
          pipe_sampler_view_reference(&surface_view, NULL);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index e9024e4a409..855841722c4 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -26,6 +26,7 @@
  **************************************************************************/
 
 #include "vl_mpeg12_mc_renderer.h"
+#include "util/u_draw.h"
 #include <assert.h>
 #include <pipe/p_context.h>
 #include <util/u_inlines.h>
@@ -1074,8 +1075,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->i_vs);
       r->pipe->bind_fs_state(r->pipe, r->i_fs);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
+      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
       vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
    }
 
@@ -1089,8 +1090,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
       r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
+      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
       vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
    }
 
@@ -1104,8 +1105,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
       r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
+      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
       vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
    }
 
@@ -1119,8 +1120,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
       r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
+      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
       vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
    }
 
@@ -1134,8 +1135,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
       r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
+      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
       vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
    }
 
@@ -1151,8 +1152,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
       r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
+      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
       vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
    }
 
@@ -1168,8 +1169,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
       r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
 
-      r->pipe->draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                           num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
+      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
       vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
    }
 
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 44df00e0b78..11082109cfc 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -33,6 +33,7 @@
 #include <util/u_memory.h>
 #include <util/u_rect.h>
 #include <util/u_video.h>
+#include <util/u_surface.h>
 #include "sp_public.h"
 #include "sp_texture.h"
 
@@ -97,8 +98,8 @@ sp_mpeg12_is_format_supported(struct pipe_video_context *vpipe,
    if (geom & PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO)
       return FALSE;
 
-   return ctx->pipe->screen->is_format_supported(ctx->pipe->screen, PIPE_TEXTURE_2D,
-                                                 format, usage, geom);
+   return ctx->pipe->screen->is_format_supported(ctx->pipe->screen, format, PIPE_TEXTURE_2D,
+                                                 0, usage, geom);
 }
 
 static void
@@ -132,14 +133,15 @@ sp_mpeg12_surface_fill(struct pipe_video_context *vpipe,
                        unsigned value)
 {
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
+   float rgba[4] = { 0, 0, 0, 0 };
 
    assert(vpipe);
    assert(dst);
 
-   if (ctx->pipe->surface_fill)
-      ctx->pipe->surface_fill(ctx->pipe, dst, dstx, dsty, width, height, value);
+   if (ctx->pipe->clear_render_target)
+      ctx->pipe->clear_render_target(ctx->pipe, dst, rgba, dstx, dsty, width, height);
    else
-      util_surface_fill(ctx->pipe, dst, dstx, dsty, width, height, value);
+      util_clear_render_target(ctx->pipe, dst, rgba, dstx, dsty, width, height);
 }
 
 static void
@@ -155,10 +157,20 @@ sp_mpeg12_surface_copy(struct pipe_video_context *vpipe,
    assert(vpipe);
    assert(dst);
 
-   if (ctx->pipe->surface_copy)
-      ctx->pipe->surface_copy(ctx->pipe, dst, dstx, dsty, src, srcx, srcy, width, height);
+   struct pipe_subresource subdst, subsrc;
+   subdst.face = dst->face;
+   subdst.level = dst->level;
+   subsrc.face = src->face;
+   subsrc.level = src->level;
+
+   if (ctx->pipe->resource_copy_region)
+      ctx->pipe->resource_copy_region(ctx->pipe, dst->texture, subdst, dstx, dsty, dst->zslice,
+                                      src->texture, subsrc, srcx, srcy, src->zslice,
+                                      width, height);
    else
-      util_surface_copy(ctx->pipe, FALSE, dst, dstx, dsty, src, srcx, srcy, width, height);
+      util_resource_copy_region(ctx->pipe, dst->texture, subdst, dstx, dsty, dst->zslice,
+                                src->texture, subsrc, srcx, srcy, src->zslice,
+                                width, height);
 }
 
 static struct pipe_transfer*
@@ -339,12 +351,12 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
    rast.flatshade = 1;
    rast.flatshade_first = 0;
    rast.light_twoside = 0;
-   rast.front_winding = PIPE_WINDING_CCW;
-   rast.cull_mode = PIPE_WINDING_CW;
-   rast.fill_cw = PIPE_POLYGON_MODE_FILL;
-   rast.fill_ccw = PIPE_POLYGON_MODE_FILL;
-   rast.offset_cw = 0;
-   rast.offset_ccw = 0;
+   rast.front_ccw = 1;
+   rast.cull_face = PIPE_FACE_NONE;
+   rast.fill_back = PIPE_POLYGON_MODE_FILL;
+   rast.fill_front = PIPE_POLYGON_MODE_FILL;
+   rast.offset_point = 0;
+   rast.offset_line = 0;
    rast.scissor = 0;
    rast.poly_smooth = 0;
    rast.poly_stipple_enable = 0;
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 0a7f324a77c..cc80583f088 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -82,7 +82,7 @@ vl_drawable_surface_get(struct vl_screen *vscreen, Drawable drawable)
    templat.height0 = height;
    templat.depth0 = 1;
    templat.usage = PIPE_USAGE_DEFAULT;
-   templat.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_BLIT_SOURCE;
+   templat.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET;
    templat.flags = 0;
 
    drawable_tex = vscreen->pscreen->resource_create(vscreen->pscreen, &templat);
-- 
cgit v1.2.3


From ba9caba9c8b8d469dbdff677274d574b9051bc45 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 23 Oct 2010 13:54:47 +0200
Subject: Some more merge fixes

---
 src/gallium/winsys/g3dvl/dri/dri_winsys.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
index 06631840f78..4d10e27c580 100644
--- a/src/gallium/winsys/g3dvl/dri/dri_winsys.c
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -32,13 +32,12 @@
 #include <util/u_memory.h>
 #include <util/u_hash.h>
 #include <util/u_hash_table.h>
-#include <state_tracker/drm_api.h>
+#include <state_tracker/drm_driver.h>
 #include <X11/Xlibint.h>
 
 struct vl_dri_screen
 {
    struct vl_screen base;
-   struct drm_api *api;
    dri_screen_t *dri_screen;
    struct util_hash_table *drawable_table;
    Drawable last_seen_drawable;
@@ -176,7 +175,6 @@ struct vl_screen*
 vl_screen_create(Display *display, int screen)
 {
    struct vl_dri_screen *vl_dri_scrn;
-   struct drm_create_screen_arg arg;
 
    assert(display);
 
@@ -187,15 +185,7 @@ vl_screen_create(Display *display, int screen)
    if (dri2CreateScreen(display, screen, &vl_dri_scrn->dri_screen))
       goto no_dri2screen;
 
-   vl_dri_scrn->api = drm_api_create();
-   if (!vl_dri_scrn->api)
-      goto no_drmapi;
-
-   arg.mode = DRM_CREATE_NORMAL;
-
-   vl_dri_scrn->base.pscreen = vl_dri_scrn->api->create_screen(vl_dri_scrn->api,
-                                                               vl_dri_scrn->dri_screen->fd,
-                                                               &arg);
+   vl_dri_scrn->base.pscreen = driver_descriptor.create_screen(vl_dri_scrn->dri_screen->fd);
 
    if (!vl_dri_scrn->base.pscreen)
       goto no_pscreen;
@@ -212,8 +202,6 @@ vl_screen_create(Display *display, int screen)
 no_hash:
    vl_dri_scrn->base.pscreen->destroy(vl_dri_scrn->base.pscreen);
 no_pscreen:
-   vl_dri_scrn->api->destroy(vl_dri_scrn->api);
-no_drmapi:
    dri2DestroyScreen(vl_dri_scrn->dri_screen);
 no_dri2screen:
    FREE(vl_dri_scrn);
@@ -230,8 +218,6 @@ void vl_screen_destroy(struct vl_screen *vscreen)
    util_hash_table_foreach(vl_dri_scrn->drawable_table, drawable_destroy, vl_dri_scrn);
    util_hash_table_destroy(vl_dri_scrn->drawable_table);
    vl_dri_scrn->base.pscreen->destroy(vl_dri_scrn->base.pscreen);
-   if (vl_dri_scrn->api->destroy)
-      vl_dri_scrn->api->destroy(vl_dri_scrn->api);
    dri2DestroyScreen(vl_dri_scrn->dri_screen);
    FREE(vl_dri_scrn);
 }
-- 
cgit v1.2.3


From de4c2b91f4dd2408422bcbdbf3e9cf3897533e6d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 23 Oct 2010 14:01:13 +0200
Subject: Fix uninitialized memory problems

---
 src/gallium/auxiliary/vl/vl_compositor.c         | 1 +
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 3 +++
 src/gallium/drivers/softpipe/sp_video_context.c  | 3 +++
 3 files changed, 7 insertions(+)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 01d09602558..1a05f369d9b 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -177,6 +177,7 @@ init_pipe_state(struct vl_compositor *c)
    c->fb_state.nr_cbufs = 1;
    c->fb_state.zsbuf = NULL;
 
+   memset(&sampler, 0, sizeof(sampler));
    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
    sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 855841722c4..b6f45048a36 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -458,6 +458,7 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    filters[4] = PIPE_TEX_FILTER_LINEAR;
 
    for (i = 0; i < 5; ++i) {
+      memset(&sampler, 0, sizeof(sampler));
       sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
       sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
       sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
@@ -604,6 +605,8 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
       );
    }
 
+   memset(&vertex_elems, 0, sizeof(vertex_elems));
+
    /* Position element */
    vertex_elems[0].src_offset = 0;
    vertex_elems[0].instance_divisor = 0;
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 11082109cfc..ff217d66c85 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -348,6 +348,7 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
 
    assert(ctx);
 
+   memset(&rast, 0, sizeof rast);
    rast.flatshade = 1;
    rast.flatshade_first = 0;
    rast.light_twoside = 0;
@@ -378,6 +379,7 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
    ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
    ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
 
+   memset(&blend, 0, sizeof blend);
    blend.independent_blend_enable = 0;
    blend.rt[0].blend_enable = 0;
    blend.rt[0].rgb_func = PIPE_BLEND_ADD;
@@ -394,6 +396,7 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
    ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend);
    ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
 
+   memset(&dsa, 0, sizeof dsa);
    dsa.depth.enabled = 0;
    dsa.depth.writemask = 0;
    dsa.depth.func = PIPE_FUNC_ALWAYS;
-- 
cgit v1.2.3


From b122e50c3eabf157f8b7a3647590a37abd276c5c Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Sat, 23 Oct 2010 15:59:45 +0200
Subject: vl: initial va-api implementation

---
 src/gallium/state_trackers/va/Makefile     |  19 +++++
 src/gallium/state_trackers/va/ftab.c       | 131 +++++++++++++++++++++++++++++
 src/gallium/state_trackers/va/htab.c       |  94 +++++++++++++++++++++
 src/gallium/state_trackers/va/va_private.h |   0
 4 files changed, 244 insertions(+)
 create mode 100644 src/gallium/state_trackers/va/Makefile
 create mode 100644 src/gallium/state_trackers/va/ftab.c
 create mode 100644 src/gallium/state_trackers/va/htab.c
 create mode 100644 src/gallium/state_trackers/va/va_private.h

diff --git a/src/gallium/state_trackers/va/Makefile b/src/gallium/state_trackers/va/Makefile
new file mode 100644
index 00000000000..28fe5d09694
--- /dev/null
+++ b/src/gallium/state_trackers/va/Makefile
@@ -0,0 +1,19 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = vatracker
+
+VA_MAJOR = 0
+VA_MINOR = 3
+LIBRARY_DEFINES = -DVER_MAJOR=$(VA_MAJOR) -DVER_MINOR=$(VA_MINOR) $(STATE_TRACKER_DEFINES)
+
+LIBRARY_INCLUDES = \
+	$(shell pkg-config --cflags-only-I vdpau) \
+	-I$(TOP)/src/gallium/winsys/g3dvl
+
+C_SOURCES = htab.c \
+	    ftab.c 
+
+
+include ../../Makefile.template
+
diff --git a/src/gallium/state_trackers/va/ftab.c b/src/gallium/state_trackers/va/ftab.c
new file mode 100644
index 00000000000..694390b3464
--- /dev/null
+++ b/src/gallium/state_trackers/va/ftab.c
@@ -0,0 +1,131 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+#include <VA/va_backend.h>
+
+const struct VADriverVTable vtable =
+{
+   0, /* VAStatus (*vaTerminate) ( VADriverContextP ctx ); */
+   0, /* VAStatus (*vaQueryConfigProfiles) ( VADriverContextP ctx, VAProfile *profile_list,int *num_profiles); */
+   0, /* VAStatus (*vaQueryConfigEntrypoints) ( VADriverContextP ctx,	VAProfile profile, VAEntrypoint  *entrypoint_list, int *num_entrypoints	); */
+   0, /* VAStatus (*vaGetConfigAttributes) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint, VAConfigAttrib *attrib_list, int num_attribs ); */
+   0, /* VAStatus (*vaCreateConfig) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint,	VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id); */
+   0, /* VAStatus (*vaDestroyConfig) ( VADriverContextP ctx, VAConfigID config_id); */
+   0, /* VAStatus (*vaQueryConfigAttributes) ( VADriverContextP ctx, VAConfigID config_id, VAProfile *profile, VAEntrypoint *entrypoint, VAConfigAttrib *attrib_list, int *num_attribs); */
+   0, /* VAStatus (*vaCreateConfig) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint,	VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id); */
+   0, /* VAStatus (*vaDestroyConfig) ( VADriverContextP ctx, VAConfigID config_id ); */
+   0, /* VAStatus (*vaQueryConfigAttributes) (VADriverContextP ctx,VAConfigID config_id,VAProfile *profile,VAEntrypoint *entrypoint,VAConfigAttrib *attrib_list,int *num_attribs); */
+   0, /* VAStatus (*vaCreateSurfaces) ( VADriverContextP ctx,int width,int height,int format,int num_surfaces,VASurfaceID *surfaces); */
+   0, /* VAStatus (*vaDestroySurfaces) ( VADriverContextP ctx, VASurfaceID *surface_list, int num_surfaces ); */
+   0, /* VAStatus (*vaCreateContext) (VADriverContextP ctx,VAConfigID config_id,int picture_width,int picture_height,int flag,VASurfaceID *render_targets,int num_render_targets,VAContextID *context); */
+   0, /* VAStatus (*vaDestroyContext) (VADriverContextP ctx,VAContextID context); */
+   0, /* VAStatus (*vaCreateBuffer) (VADriverContextP ctx,VAContextID context,VABufferType type,unsigned int size,unsigned int num_elements,void *data,VABufferID *buf_id); */
+   0, /* VAStatus (*vaBufferSetNumElements) (VADriverContextP ctx,VABufferID buf_id,unsigned int num_elements); */
+   0, /* VAStatus (*vaMapBuffer) (VADriverContextP ctx,VABufferID buf_id,void **pbuf); */
+   0, /* VAStatus (*vaUnmapBuffer) (VADriverContextP ctx,VABufferID buf_id); */
+   0, /* VAStatus (*vaDestroyBuffer) (VADriverContextP ctx,VABufferID buffer_id); */
+   0, /* VAStatus (*vaBeginPicture) (VADriverContextP ctx,VAContextID context,VASurfaceID render_target); */
+   0, /* VAStatus (*vaRenderPicture) (VADriverContextP ctx,VAContextID context,VABufferID *buffers,int num_buffers); */
+   0, /* VAStatus (*vaEndPicture) (VADriverContextP ctx,VAContextID context); */
+   0, /* VAStatus (*vaSyncSurface) (VADriverContextP ctx,VASurfaceID render_target); */
+   0, /* VAStatus (*vaQuerySurfaceStatus) (VADriverContextP ctx,VASurfaceID render_target,VASurfaceStatus *status); */
+   0, /* VAStatus (*vaPutSurface) (
+    		VADriverContextP ctx,
+		VASurfaceID surface,
+		void* draw,
+		short srcx,
+		short srcy,
+		unsigned short srcw,
+		unsigned short srch,
+		short destx,
+		short desty,
+		unsigned short destw,
+		unsigned short desth,
+		VARectangle *cliprects, 
+		unsigned int number_cliprects, 
+		unsigned int flags); */
+   0, /* VAStatus (*vaQueryImageFormats) ( VADriverContextP ctx, VAImageFormat *format_list,int *num_formats); */
+   0, /* VAStatus (*vaCreateImage) (VADriverContextP ctx,VAImageFormat *format,int width,int height,VAImage *image); */
+   0, /* VAStatus (*vaDeriveImage) (VADriverContextP ctx,VASurfaceID surface,VAImage *image); */
+   0, /* VAStatus (*vaDestroyImage) (VADriverContextP ctx,VAImageID image); */
+   0, /* VAStatus (*vaSetImagePalette) (VADriverContextP ctx,VAImageID image, unsigned char *palette); */
+   0, /* VAStatus (*vaGetImage) (VADriverContextP ctx,VASurfaceID surface,int x,int y,unsigned int width,unsigned int height,VAImageID image); */
+   0, /* VAStatus (*vaPutImage) (
+		VADriverContextP ctx,
+		VASurfaceID surface,
+		VAImageID image,
+		int src_x,
+		int src_y,
+		unsigned int src_width,
+		unsigned int src_height,
+		int dest_x,
+		int dest_y,
+		unsigned int dest_width,
+		unsigned int dest_height
+	); */
+   0,	/* VAStatus (*vaQuerySubpictureFormats) (VADriverContextP ctx,VAImageFormat *format_list,unsigned int *flags,unsigned int *num_formats); */
+   0, /* VAStatus (*vaCreateSubpicture) (VADriverContextP ctx,VAImageID image,VASubpictureID *subpicture); */
+   0, /* VAStatus (*vaDestroySubpicture) (VADriverContextP ctx,VASubpictureID subpicture); */
+   0, /* VAStatus (*vaSetSubpictureImage) (VADriverContextP ctx,VASubpictureID subpicture,VAImageID image); */
+   0, /* VAStatus (*vaSetSubpictureChromakey) (VADriverContextP ctx,VASubpictureID subpicture,unsigned int chromakey_min,unsigned int chromakey_max,unsigned int chromakey_mask); */
+   0, /* VAStatus (*vaSetSubpictureGlobalAlpha) (VADriverContextP ctx,VASubpictureID subpicture,float global_alpha); */
+   0, /* VAStatus (*vaAssociateSubpicture) (
+		VADriverContextP ctx,
+		VASubpictureID subpicture,
+		VASurfaceID *target_surfaces,
+		int num_surfaces,
+		short src_x,
+		short src_y,
+		unsigned short src_width,
+		unsigned short src_height,
+		short dest_x,
+		short dest_y,
+		unsigned short dest_width,
+		unsigned short dest_height,
+		unsigned int flags); */
+   0, /* VAStatus (*vaDeassociateSubpicture) (VADriverContextP ctx,VASubpictureID subpicture,VASurfaceID *target_surfaces,int num_surfaces); */
+   0, /* VAStatus (*vaQueryDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int *num_attributes); */
+   0, /* VAStatus (*vaGetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
+   0, /* VAStatus (*vaSetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
+   0, /* VAStatus (*vaBufferInfo) (VADriverContextP ctx,VAContextID context,VABufferID buf_id,VABufferType *type,unsigned int *size,unsigned int *num_elements); */
+   0, /* VAStatus (*vaLockSurface) (
+		VADriverContextP ctx,
+                VASurfaceID surface,
+                unsigned int *fourcc,
+                unsigned int *luma_stride,
+                unsigned int *chroma_u_stride,
+                unsigned int *chroma_v_stride,
+                unsigned int *luma_offset,
+                unsigned int *chroma_u_offset,
+                unsigned int *chroma_v_offset,
+                unsigned int *buffer_name,
+                void **buffer); */
+   0, /* VAStatus (*vaUnlockSurface) (VADriverContextP ctx,VASurfaceID surface); */
+   0 /* struct VADriverVTableGLX *glx; "Optional" */
+};
+
diff --git a/src/gallium/state_trackers/va/htab.c b/src/gallium/state_trackers/va/htab.c
new file mode 100644
index 00000000000..7b7c111a4be
--- /dev/null
+++ b/src/gallium/state_trackers/va/htab.c
@@ -0,0 +1,94 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <util/u_handle_table.h>
+#include <os/os_thread.h>
+#include "va_private.h"
+
+#ifdef VL_HANDLES
+static struct handle_table *htab = NULL;
+pipe_static_mutex(htab_lock);
+#endif
+
+boolean vlCreateHTAB(void)
+{
+#ifdef VL_HANDLES
+   boolean ret;
+   /* Make sure handle table handles match VDPAU handles. */
+   assert(sizeof(unsigned) <= sizeof(vlHandle));
+   pipe_mutex_lock(htab_lock);
+   if (!htab)
+      htab = handle_table_create();
+   ret = htab != NULL;
+   pipe_mutex_unlock(htab_lock);
+   return ret;
+#else
+   return TRUE;
+#endif
+}
+
+void vlDestroyHTAB(void)
+{
+#ifdef VL_HANDLES
+   pipe_mutex_lock(htab_lock);
+   if (htab) {
+      handle_table_destroy(htab);
+      htab = NULL;
+   }
+   pipe_mutex_unlock(htab_lock);
+#endif
+}
+
+vlHandle vlAddDataHTAB(void *data)
+{
+   assert(data);
+#ifdef VL_HANDLES
+   vlHandle handle = 0;
+   pipe_mutex_lock(htab_lock);
+   if (htab)
+      handle = handle_table_add(htab, data);
+   pipe_mutex_unlock(htab_lock);
+   return handle;
+#else
+   return (vlHandle)data;
+#endif
+}
+
+void* vlGetDataHTAB(vlHandle handle)
+{
+   assert(handle);
+#ifdef VL_HANDLES
+   void *data = NULL;
+   pipe_mutex_lock(htab_lock);
+   if (htab)
+      data = handle_table_get(htab, handle);
+   pipe_mutex_unlock(htab_lock);
+   return data;
+#else
+   return (void*)handle;
+#endif
+}
diff --git a/src/gallium/state_trackers/va/va_private.h b/src/gallium/state_trackers/va/va_private.h
new file mode 100644
index 00000000000..e69de29bb2d
-- 
cgit v1.2.3


From 4381580936f07e0c5eb0ad7c51d2ccf765000d08 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 23 Oct 2010 17:47:30 +0200
Subject: First xvmc-r600 implementation

---
 configure.ac                                     |   2 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |   2 +-
 src/gallium/drivers/r600/Makefile                |   1 +
 src/gallium/drivers/r600/r600_pipe.c             |   2 +
 src/gallium/drivers/r600/r600_video_context.c    | 312 +++++++++++++++++++++++
 src/gallium/drivers/r600/r600_video_context.h    |  11 +
 src/gallium/targets/Makefile.xvmc                |   1 +
 src/gallium/targets/xvmc-r600/Makefile           |  23 ++
 src/gallium/targets/xvmc-r600/target.c           |  24 ++
 9 files changed, 376 insertions(+), 2 deletions(-)
 create mode 100644 src/gallium/drivers/r600/r600_video_context.c
 create mode 100644 src/gallium/drivers/r600/r600_video_context.h
 create mode 100644 src/gallium/targets/xvmc-r600/Makefile
 create mode 100644 src/gallium/targets/xvmc-r600/target.c

diff --git a/configure.ac b/configure.ac
index dec8a2674b9..b9ece4e83e0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1570,7 +1570,7 @@ AC_ARG_ENABLE([gallium-r600],
 if test "x$enable_gallium_r600" = xyes; then
     if test "x$HAVE_LIBDRM_RADEON" = xyes; then
 	GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
-	gallium_check_st "r600/drm" "dri-r600"
+	gallium_check_st "r600/drm" "dri-r600" "xvmc-r600"
     else
 	AC_MSG_ERROR([libdrm_radeon is missing, cannot build gallium-r600])
     fi
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index b6f45048a36..1de17dd6239 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -278,7 +278,7 @@ create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
    for (i = 0; i < 4; ++i)
       vtex[i] = ureg_DECL_vs_input(shader, i + 1);
    /* Skip input 5 */
-   vtex[4] = ureg_DECL_vs_input(shader, 6);
+   vtex[4] = ureg_DECL_vs_input(shader, 5);
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
    for (i = 0; i < 5; ++i)
       o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile
index ede0bb2ec45..0dc9663de6a 100644
--- a/src/gallium/drivers/r600/Makefile
+++ b/src/gallium/drivers/r600/Makefile
@@ -17,6 +17,7 @@ C_SOURCES = \
 	r600_shader.c \
 	r600_state.c \
 	r600_texture.c \
+	r600_video_context.c \
 	r700_asm.c \
 	evergreen_state.c \
 	eg_asm.c
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 0589652f705..a9fc31d9fbd 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -43,6 +43,7 @@
 #include "r600_shader.h"
 #include "r600_pipe.h"
 #include "r600_state_inlines.h"
+#include "r600_video_context.h"
 
 /*
  * pipe_context
@@ -425,6 +426,7 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon)
 	rscreen->screen.get_paramf = r600_get_paramf;
 	rscreen->screen.is_format_supported = r600_is_format_supported;
 	rscreen->screen.context_create = r600_create_context;
+	rscreen->screen.video_context_create = r600_video_create;
 	r600_init_screen_texture_functions(&rscreen->screen);
 	r600_init_screen_resource_functions(&rscreen->screen);
 
diff --git a/src/gallium/drivers/r600/r600_video_context.c b/src/gallium/drivers/r600/r600_video_context.c
new file mode 100644
index 00000000000..9f3e0dc147c
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_video_context.c
@@ -0,0 +1,312 @@
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+#include <pipe/p_defines.h>
+#include <pipe/p_context.h>
+#include <pipe/p_screen.h>
+#include <util/u_memory.h>
+#include <X11/Xlib.h>
+
+#include <fcntl.h>
+
+#include "softpipe/sp_texture.h"
+
+#include "r600_video_context.h"
+#include <softpipe/sp_video_context.h>
+
+#if 0
+
+static void r600_mpeg12_destroy(struct pipe_video_context *vpipe)
+{
+    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
+
+    assert(vpipe);
+
+    ctx->pipe->bind_vs_state(ctx->pipe, NULL);
+    ctx->pipe->bind_fs_state(ctx->pipe, NULL);
+
+    ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend);
+    ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast);
+    ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
+
+    pipe_video_surface_reference(&ctx->decode_target, NULL);
+    vl_compositor_cleanup(&ctx->compositor);
+    vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+    ctx->pipe->destroy(ctx->pipe);
+
+    FREE(ctx);
+}
+
+static void
+r600_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
+                               struct pipe_video_surface *past,
+                               struct pipe_video_surface *future,
+                               unsigned num_macroblocks,
+                               struct pipe_macroblock *macroblocks,
+                               struct pipe_fence_handle **fence)
+{
+    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
+    struct pipe_mpeg12_macroblock *mpeg12_macroblocks =
+                         (struct pipe_mpeg12_macroblock*)macroblocks;
+
+    assert(vpipe);
+    assert(num_macroblocks);
+    assert(macroblocks);
+    assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
+    assert(ctx->decode_target);
+
+    vl_mpeg12_mc_renderer_render_macroblocks(
+                            &ctx->mc_renderer,
+                            r600_video_surface(ctx->decode_target)->tex,
+                            past ? r600_video_surface(past)->tex : NULL,
+                            future ? r600_video_surface(future)->tex : NULL,
+                            num_macroblocks, mpeg12_macroblocks, fence);
+}
+
+static void r600_mpeg12_clear_surface(struct pipe_video_context *vpipe,
+                                      unsigned x, unsigned y,
+                                      unsigned width, unsigned height,
+                                      unsigned value,
+                                      struct pipe_surface *surface)
+{
+    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
+
+    assert(vpipe);
+    assert(surface);
+
+    if (ctx->pipe->surface_fill)
+        ctx->pipe->surface_fill(ctx->pipe, surface, x, y, width, height, value);
+    else
+        util_surface_fill(ctx->pipe, surface, x, y, width, height, value);
+}
+
+static void
+r600_mpeg12_render_picture(struct pipe_video_context     *vpipe,
+                           struct pipe_video_surface     *src_surface,
+                           enum pipe_mpeg12_picture_type picture_type,
+                           struct pipe_video_rect        *src_area,
+                           struct pipe_surface           *dst_surface,
+                           struct pipe_video_rect        *dst_area,
+                           struct pipe_fence_handle      **fence)
+{
+    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
+
+    assert(vpipe);
+    assert(src_surface);
+    assert(src_area);
+    assert(dst_surface);
+    assert(dst_area);
+
+    vl_compositor_render(&ctx->compositor,
+                         r600_video_surface(src_surface)->tex,
+                         picture_type, src_area, dst_surface->texture,
+                         dst_area, fence);
+}
+
+static void r600_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
+                                          struct pipe_video_surface *dt)
+{
+    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
+
+    assert(vpipe);
+    assert(dt);
+
+    pipe_video_surface_reference(&ctx->decode_target, dt);
+}
+
+static void r600_mpeg12_set_csc_matrix(struct pipe_video_context *vpipe,
+                                       const float *mat)
+{
+    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
+
+    assert(vpipe);
+
+    vl_compositor_set_csc_matrix(&ctx->compositor, mat);
+}
+
+static bool r600_mpeg12_init_pipe_state(struct radeon_mpeg12_context *ctx)
+{
+    struct pipe_rasterizer_state rast;
+    struct pipe_blend_state blend;
+    struct pipe_depth_stencil_alpha_state dsa;
+    unsigned i;
+
+    assert(ctx);
+
+    rast.flatshade = 1;
+    rast.flatshade_first = 0;
+    rast.light_twoside = 0;
+    rast.front_winding = PIPE_WINDING_CCW;
+    rast.cull_mode = PIPE_WINDING_CW;
+    rast.fill_cw = PIPE_POLYGON_MODE_FILL;
+    rast.fill_ccw = PIPE_POLYGON_MODE_FILL;
+    rast.offset_cw = 0;
+    rast.offset_ccw = 0;
+    rast.scissor = 0;
+    rast.poly_smooth = 0;
+    rast.poly_stipple_enable = 0;
+    rast.point_sprite = 0;
+    rast.point_size_per_vertex = 0;
+    rast.multisample = 0;
+    rast.line_smooth = 0;
+    rast.line_stipple_enable = 0;
+    rast.line_stipple_factor = 0;
+    rast.line_stipple_pattern = 0;
+    rast.line_last_pixel = 0;
+    rast.bypass_vs_clip_and_viewport = 0;
+    rast.line_width = 1;
+    rast.point_smooth = 0;
+    rast.point_size = 1;
+    rast.offset_units = 1;
+    rast.offset_scale = 1;
+    /*rast.sprite_coord_mode[i] = ;*/
+    ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
+    ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
+
+    blend.blend_enable = 0;
+    blend.rgb_func = PIPE_BLEND_ADD;
+    blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+    blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+    blend.alpha_func = PIPE_BLEND_ADD;
+    blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+    blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+    blend.logicop_enable = 0;
+    blend.logicop_func = PIPE_LOGICOP_CLEAR;
+    /* Needed to allow color writes to FB, even if blending disabled */
+    blend.colormask = PIPE_MASK_RGBA;
+    blend.dither = 0;
+    ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend);
+    ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
+
+    dsa.depth.enabled = 0;
+    dsa.depth.writemask = 0;
+    dsa.depth.func = PIPE_FUNC_ALWAYS;
+    for (i = 0; i < 2; ++i)
+    {
+        dsa.stencil[i].enabled = 0;
+        dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
+        dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
+        dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
+        dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
+        dsa.stencil[i].ref_value = 0;
+        dsa.stencil[i].valuemask = 0;
+        dsa.stencil[i].writemask = 0;
+    }
+    dsa.alpha.enabled = 0;
+    dsa.alpha.func = PIPE_FUNC_ALWAYS;
+    dsa.alpha.ref_value = 0;
+    ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa);
+    ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
+}
+
+static struct pipe_video_context *
+r600_mpeg12_context_create(struct pipe_screen *screen,
+                           enum pipe_video_profile profile,
+                           enum pipe_video_chroma_format chroma_format,
+                           unsigned int width,
+                           unsigned int height)
+{
+    struct radeon_mpeg12_context *ctx;
+    ctx = CALLOC_STRUCT(radeon_mpeg12_context);
+    if (!ctx)
+        return NULL;
+
+    ctx->base.profile       = profile;
+    ctx->base.chroma_format = chroma_format;
+    ctx->base.width         = width;
+    ctx->base.height        = height;
+    ctx->base.screen        = screen;
+
+    ctx->base.destroy               = radeon_mpeg12_destroy;
+    ctx->base.decode_macroblocks    = radeon_mpeg12_decode_macroblocks;
+    ctx->base.clear_surface         = radeon_mpeg12_clear_surface;
+    ctx->base.render_picture        = radeon_mpeg12_render_picture;
+    ctx->base.set_decode_target     = radeon_mpeg12_set_decode_target;
+    ctx->base.set_csc_matrix        = radeon_mpeg12_set_csc_matrix;
+
+    ctx->pipe = r600_create_context(screen,(struct r600_winsys*)screen->winsys);
+    if (!ctx->pipe)
+    {
+        FREE(ctx);
+        return NULL;
+    }
+
+    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
+                                   width, height, chroma_format,
+                                   VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
+                                   VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
+                                   true))
+    {
+        ctx->pipe->destroy(ctx->pipe);
+        FREE(ctx);
+        return NULL;
+    }
+
+    if (!vl_compositor_init(&ctx->compositor, ctx->pipe))
+    {
+        vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+        ctx->pipe->destroy(ctx->pipe);
+        FREE(ctx);
+        return NULL;
+    }
+
+    if (!radeon_mpeg12_init_pipe_state(ctx))
+    {
+        vl_compositor_cleanup(&ctx->compositor);
+        vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+        ctx->pipe->destroy(ctx->pipe);
+        FREE(ctx);
+        return NULL;
+    }
+
+    return &ctx->base;
+}
+
+#endif
+
+struct pipe_video_context *
+r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height, void *priv)
+{
+   struct pipe_context *pipe;
+
+   assert(screen);
+
+   pipe = screen->context_create(screen, priv);
+   if (!pipe)
+      return NULL;
+
+   return sp_video_create_ex(pipe, profile, chroma_format, width, height,
+                             VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
+                             VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
+                             true,
+                             PIPE_FORMAT_VUYX);
+
+#if 0
+    struct pipe_video_context *vpipe;
+    struct radeon_vl_context *rvl_ctx;
+
+    assert(p_screen);
+    assert(width && height);
+
+    /* create radeon pipe_context */
+    switch(u_reduce_video_profile(profile))
+    {
+        case PIPE_VIDEO_CODEC_MPEG12:
+            vpipe = radeon_mpeg12_context_create(p_screen, profile, chr_f,
+                                                 width, height);
+            break;
+        default:
+            return NULL;
+    }
+
+    /* create radeon_vl_context */
+    rvl_ctx = calloc(1, sizeof(struct radeon_vl_context));
+    rvl_ctx->display = display;
+    rvl_ctx->screen = screen;
+
+    vpipe->priv = rvl_ctx;
+
+    return vpipe;
+#endif
+}
diff --git a/src/gallium/drivers/r600/r600_video_context.h b/src/gallium/drivers/r600/r600_video_context.h
new file mode 100644
index 00000000000..bda33a00d44
--- /dev/null
+++ b/src/gallium/drivers/r600/r600_video_context.h
@@ -0,0 +1,11 @@
+#ifndef __R600_VIDEO_CONTEXT_H__
+#define __R600_VIDEO_CONTEXT_H__
+
+#include <pipe/p_video_context.h>
+
+struct pipe_video_context *
+r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height, void *priv);
+
+#endif
diff --git a/src/gallium/targets/Makefile.xvmc b/src/gallium/targets/Makefile.xvmc
index e48906a1345..08529b38b70 100644
--- a/src/gallium/targets/Makefile.xvmc
+++ b/src/gallium/targets/Makefile.xvmc
@@ -7,6 +7,7 @@ XVMC_MINOR = 0
 INCLUDES = -I$(TOP)/src/gallium/include \
 	   -I$(TOP)/src/gallium/drivers \
 	   -I$(TOP)/src/gallium/auxiliary \
+	   -I$(TOP)/src/gallium/winsys \
 	   -I$(TOP)/src/gallium/winsys/g3dvl \
 	   $(DRIVER_INCLUDES)
 DEFINES = -DGALLIUM_TRACE $(DRIVER_DEFINES)
diff --git a/src/gallium/targets/xvmc-r600/Makefile b/src/gallium/targets/xvmc-r600/Makefile
new file mode 100644
index 00000000000..25aeb65059f
--- /dev/null
+++ b/src/gallium/targets/xvmc-r600/Makefile
@@ -0,0 +1,23 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+#LIBNAME =
+
+PIPE_DRIVERS = \
+        $(TOP)/src/gallium/drivers/r600/libr600.a \
+	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
+        $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+        $(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/auxiliary/libgallium.a
+
+C_SOURCES = \
+	target.c \
+	$(COMMON_GALLIUM_SOURCES) \
+	$(DRIVER_SOURCES)
+
+DRIVER_LIBS = $(shell pkg-config libdrm_radeon --libs) -lXfixes
+
+include ../Makefile.xvmc
+
+symlinks:
diff --git a/src/gallium/targets/xvmc-r600/target.c b/src/gallium/targets/xvmc-r600/target.c
new file mode 100644
index 00000000000..8753e2bab17
--- /dev/null
+++ b/src/gallium/targets/xvmc-r600/target.c
@@ -0,0 +1,24 @@
+#include "state_tracker/drm_driver.h"
+#include "target-helpers/inline_debug_helper.h"
+#include "r600/drm/r600_drm_public.h"
+#include "r600/r600_public.h"
+
+static struct pipe_screen *create_screen(int fd)
+{
+   struct radeon *radeon;
+   struct pipe_screen *screen;
+
+   radeon = r600_drm_winsys_create(fd);
+   if (!radeon)
+      return NULL;
+
+   screen = r600_screen_create(radeon);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen)
-- 
cgit v1.2.3


From b0dfc3f261b121bf8f39da6d09731ce7beb5f742 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 24 Oct 2010 00:53:49 +0200
Subject: Remove code copied over from r300

---
 src/gallium/drivers/r600/r600_video_context.c | 290 --------------------------
 1 file changed, 290 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_video_context.c b/src/gallium/drivers/r600/r600_video_context.c
index 9f3e0dc147c..2bbf622052b 100644
--- a/src/gallium/drivers/r600/r600_video_context.c
+++ b/src/gallium/drivers/r600/r600_video_context.c
@@ -1,268 +1,6 @@
-#include <X11/Xlib.h>
-#include <X11/Xutil.h>
-#include <pipe/p_defines.h>
-#include <pipe/p_context.h>
-#include <pipe/p_screen.h>
-#include <util/u_memory.h>
-#include <X11/Xlib.h>
-
-#include <fcntl.h>
-
-#include "softpipe/sp_texture.h"
-
 #include "r600_video_context.h"
 #include <softpipe/sp_video_context.h>
 
-#if 0
-
-static void r600_mpeg12_destroy(struct pipe_video_context *vpipe)
-{
-    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
-
-    assert(vpipe);
-
-    ctx->pipe->bind_vs_state(ctx->pipe, NULL);
-    ctx->pipe->bind_fs_state(ctx->pipe, NULL);
-
-    ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend);
-    ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast);
-    ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
-
-    pipe_video_surface_reference(&ctx->decode_target, NULL);
-    vl_compositor_cleanup(&ctx->compositor);
-    vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-    ctx->pipe->destroy(ctx->pipe);
-
-    FREE(ctx);
-}
-
-static void
-r600_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
-                               struct pipe_video_surface *past,
-                               struct pipe_video_surface *future,
-                               unsigned num_macroblocks,
-                               struct pipe_macroblock *macroblocks,
-                               struct pipe_fence_handle **fence)
-{
-    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
-    struct pipe_mpeg12_macroblock *mpeg12_macroblocks =
-                         (struct pipe_mpeg12_macroblock*)macroblocks;
-
-    assert(vpipe);
-    assert(num_macroblocks);
-    assert(macroblocks);
-    assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
-    assert(ctx->decode_target);
-
-    vl_mpeg12_mc_renderer_render_macroblocks(
-                            &ctx->mc_renderer,
-                            r600_video_surface(ctx->decode_target)->tex,
-                            past ? r600_video_surface(past)->tex : NULL,
-                            future ? r600_video_surface(future)->tex : NULL,
-                            num_macroblocks, mpeg12_macroblocks, fence);
-}
-
-static void r600_mpeg12_clear_surface(struct pipe_video_context *vpipe,
-                                      unsigned x, unsigned y,
-                                      unsigned width, unsigned height,
-                                      unsigned value,
-                                      struct pipe_surface *surface)
-{
-    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
-
-    assert(vpipe);
-    assert(surface);
-
-    if (ctx->pipe->surface_fill)
-        ctx->pipe->surface_fill(ctx->pipe, surface, x, y, width, height, value);
-    else
-        util_surface_fill(ctx->pipe, surface, x, y, width, height, value);
-}
-
-static void
-r600_mpeg12_render_picture(struct pipe_video_context     *vpipe,
-                           struct pipe_video_surface     *src_surface,
-                           enum pipe_mpeg12_picture_type picture_type,
-                           struct pipe_video_rect        *src_area,
-                           struct pipe_surface           *dst_surface,
-                           struct pipe_video_rect        *dst_area,
-                           struct pipe_fence_handle      **fence)
-{
-    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
-
-    assert(vpipe);
-    assert(src_surface);
-    assert(src_area);
-    assert(dst_surface);
-    assert(dst_area);
-
-    vl_compositor_render(&ctx->compositor,
-                         r600_video_surface(src_surface)->tex,
-                         picture_type, src_area, dst_surface->texture,
-                         dst_area, fence);
-}
-
-static void r600_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
-                                          struct pipe_video_surface *dt)
-{
-    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
-
-    assert(vpipe);
-    assert(dt);
-
-    pipe_video_surface_reference(&ctx->decode_target, dt);
-}
-
-static void r600_mpeg12_set_csc_matrix(struct pipe_video_context *vpipe,
-                                       const float *mat)
-{
-    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
-
-    assert(vpipe);
-
-    vl_compositor_set_csc_matrix(&ctx->compositor, mat);
-}
-
-static bool r600_mpeg12_init_pipe_state(struct radeon_mpeg12_context *ctx)
-{
-    struct pipe_rasterizer_state rast;
-    struct pipe_blend_state blend;
-    struct pipe_depth_stencil_alpha_state dsa;
-    unsigned i;
-
-    assert(ctx);
-
-    rast.flatshade = 1;
-    rast.flatshade_first = 0;
-    rast.light_twoside = 0;
-    rast.front_winding = PIPE_WINDING_CCW;
-    rast.cull_mode = PIPE_WINDING_CW;
-    rast.fill_cw = PIPE_POLYGON_MODE_FILL;
-    rast.fill_ccw = PIPE_POLYGON_MODE_FILL;
-    rast.offset_cw = 0;
-    rast.offset_ccw = 0;
-    rast.scissor = 0;
-    rast.poly_smooth = 0;
-    rast.poly_stipple_enable = 0;
-    rast.point_sprite = 0;
-    rast.point_size_per_vertex = 0;
-    rast.multisample = 0;
-    rast.line_smooth = 0;
-    rast.line_stipple_enable = 0;
-    rast.line_stipple_factor = 0;
-    rast.line_stipple_pattern = 0;
-    rast.line_last_pixel = 0;
-    rast.bypass_vs_clip_and_viewport = 0;
-    rast.line_width = 1;
-    rast.point_smooth = 0;
-    rast.point_size = 1;
-    rast.offset_units = 1;
-    rast.offset_scale = 1;
-    /*rast.sprite_coord_mode[i] = ;*/
-    ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
-    ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
-
-    blend.blend_enable = 0;
-    blend.rgb_func = PIPE_BLEND_ADD;
-    blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
-    blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
-    blend.alpha_func = PIPE_BLEND_ADD;
-    blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
-    blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
-    blend.logicop_enable = 0;
-    blend.logicop_func = PIPE_LOGICOP_CLEAR;
-    /* Needed to allow color writes to FB, even if blending disabled */
-    blend.colormask = PIPE_MASK_RGBA;
-    blend.dither = 0;
-    ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend);
-    ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
-
-    dsa.depth.enabled = 0;
-    dsa.depth.writemask = 0;
-    dsa.depth.func = PIPE_FUNC_ALWAYS;
-    for (i = 0; i < 2; ++i)
-    {
-        dsa.stencil[i].enabled = 0;
-        dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
-        dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
-        dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
-        dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
-        dsa.stencil[i].ref_value = 0;
-        dsa.stencil[i].valuemask = 0;
-        dsa.stencil[i].writemask = 0;
-    }
-    dsa.alpha.enabled = 0;
-    dsa.alpha.func = PIPE_FUNC_ALWAYS;
-    dsa.alpha.ref_value = 0;
-    ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa);
-    ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
-}
-
-static struct pipe_video_context *
-r600_mpeg12_context_create(struct pipe_screen *screen,
-                           enum pipe_video_profile profile,
-                           enum pipe_video_chroma_format chroma_format,
-                           unsigned int width,
-                           unsigned int height)
-{
-    struct radeon_mpeg12_context *ctx;
-    ctx = CALLOC_STRUCT(radeon_mpeg12_context);
-    if (!ctx)
-        return NULL;
-
-    ctx->base.profile       = profile;
-    ctx->base.chroma_format = chroma_format;
-    ctx->base.width         = width;
-    ctx->base.height        = height;
-    ctx->base.screen        = screen;
-
-    ctx->base.destroy               = radeon_mpeg12_destroy;
-    ctx->base.decode_macroblocks    = radeon_mpeg12_decode_macroblocks;
-    ctx->base.clear_surface         = radeon_mpeg12_clear_surface;
-    ctx->base.render_picture        = radeon_mpeg12_render_picture;
-    ctx->base.set_decode_target     = radeon_mpeg12_set_decode_target;
-    ctx->base.set_csc_matrix        = radeon_mpeg12_set_csc_matrix;
-
-    ctx->pipe = r600_create_context(screen,(struct r600_winsys*)screen->winsys);
-    if (!ctx->pipe)
-    {
-        FREE(ctx);
-        return NULL;
-    }
-
-    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
-                                   width, height, chroma_format,
-                                   VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
-                                   VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
-                                   true))
-    {
-        ctx->pipe->destroy(ctx->pipe);
-        FREE(ctx);
-        return NULL;
-    }
-
-    if (!vl_compositor_init(&ctx->compositor, ctx->pipe))
-    {
-        vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-        ctx->pipe->destroy(ctx->pipe);
-        FREE(ctx);
-        return NULL;
-    }
-
-    if (!radeon_mpeg12_init_pipe_state(ctx))
-    {
-        vl_compositor_cleanup(&ctx->compositor);
-        vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-        ctx->pipe->destroy(ctx->pipe);
-        FREE(ctx);
-        return NULL;
-    }
-
-    return &ctx->base;
-}
-
-#endif
-
 struct pipe_video_context *
 r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
                   enum pipe_video_chroma_format chroma_format,
@@ -281,32 +19,4 @@ r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
                              VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
                              true,
                              PIPE_FORMAT_VUYX);
-
-#if 0
-    struct pipe_video_context *vpipe;
-    struct radeon_vl_context *rvl_ctx;
-
-    assert(p_screen);
-    assert(width && height);
-
-    /* create radeon pipe_context */
-    switch(u_reduce_video_profile(profile))
-    {
-        case PIPE_VIDEO_CODEC_MPEG12:
-            vpipe = radeon_mpeg12_context_create(p_screen, profile, chr_f,
-                                                 width, height);
-            break;
-        default:
-            return NULL;
-    }
-
-    /* create radeon_vl_context */
-    rvl_ctx = calloc(1, sizeof(struct radeon_vl_context));
-    rvl_ctx->display = display;
-    rvl_ctx->screen = screen;
-
-    vpipe->priv = rvl_ctx;
-
-    return vpipe;
-#endif
 }
-- 
cgit v1.2.3


From 501ac572c604ef248ed41311a065bc5f4746fcb3 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Sun, 24 Oct 2010 19:27:29 +0200
Subject: vl: va state-tracker configuration scripts

---
 configs/autoconf.in                                |  3 ++
 configure.ac                                       | 27 ++++++++++++++++--
 src/gallium/state_trackers/va/Makefile             |  2 +-
 src/gallium/state_trackers/va/ftab.c               |  2 +-
 src/gallium/state_trackers/va/htab.c               |  4 +++
 src/gallium/state_trackers/va/va_private.h         | 32 ++++++++++++++++++++++
 .../state_trackers/vdpau/mpeg2_bitstream_parser.h  |  9 ++----
 src/gallium/state_trackers/vdpau/vdpau_private.h   |  2 +-
 8 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/configs/autoconf.in b/configs/autoconf.in
index d7eb162b684..df52b3f42f6 100644
--- a/configs/autoconf.in
+++ b/configs/autoconf.in
@@ -160,6 +160,9 @@ EGL_DRIVER_INSTALL_DIR = @EGL_DRIVER_INSTALL_DIR@
 # VDPAU library install directory
 VDPAU_LIB_INSTALL_DIR=@VDPAU_LIB_INSTALL_DIR@
 
+# VA library install directory
+VA_LIB_INSTALL_DIR=@VA_LIB_INSTALL_DIR@
+
 # Xorg driver install directory (for xorg state-tracker)
 XORG_DRIVER_INSTALL_DIR = @XORG_DRIVER_INSTALL_DIR@
 
diff --git a/configure.ac b/configure.ac
index 0344be00703..59c2eb157b7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1345,6 +1345,13 @@ yes)
             fi
             HAVE_ST_VDPAU="yes"
             ;;
+	va)
+            # Check for libva?
+            if test "x$enable_gallium_g3dvl" != xyes; then
+                AC_MSG_ERROR([cannot build va state tracker without --enable-gallium-g3dvl])
+            fi
+            HAVE_ST_VA="yes"
+            ;;
         esac
 
 	if test -n "$tracker"; then
@@ -1479,7 +1486,7 @@ dnl
 dnl Gallium helper functions
 dnl
 gallium_check_st() {
-    if test "x$HAVE_ST_DRI" = xyes || test "x$HAVE_ST_EGL" = xyes || test "x$HAVE_ST_XORG" = xyes || test "x$HAVE_ST_XVMC" = xyes || test "x$HAVE_ST_VDPAU" = xyes; then
+    if test "x$HAVE_ST_DRI" = xyes || test "x$HAVE_ST_EGL" = xyes || test "x$HAVE_ST_XORG" = xyes || test "x$HAVE_ST_XVMC" = xyes || test "x$HAVE_ST_VDPAU" = xyes || test "x$HAVE_ST_VA" = xyes; then
          GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS $1"
     fi
     if test "x$HAVE_ST_DRI" = xyes && test "x$2" != x; then
@@ -1497,6 +1504,9 @@ gallium_check_st() {
     if test "x$HAVE_ST_VDPAU" = xyes && test "x$6" != x; then
          GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $6"
     fi
+    if test "x$HAVE_ST_VA" = xyes && test "x$7" != x; then
+         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $7"
+    fi
 }
 
 
@@ -1613,8 +1623,13 @@ AC_ARG_ENABLE([gallium-g3dvl],
 if test "x$enable_gallium_g3dvl" = xyes; then
     case "$mesa_driver" in
     xlib)
+	if test "x$HAVE_ST_VDPAU" = xyes; then
         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS vdpau-softpipe"
-        ;;
+	fi
+	if test "x$HAVE_ST_VA" = xyes; then
+	GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS va-softpipe"
+	fi
+	;;
     dri)
         GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS g3dvl/dri"
         ;;
@@ -1628,6 +1643,14 @@ AC_ARG_WITH([vdpau-libdir],
     [VDPAU_LIB_INSTALL_DIR='${libdir}/vdpau'])
 AC_SUBST([VDPAU_LIB_INSTALL_DIR])
 
+dnl Directory for VA libs
+AC_ARG_WITH([va-libdir],
+    [AS_HELP_STRING([--with-va-libdir=DIR],
+        [directory for the VA libraries @<:@default=${libdir}/va@:>@])],
+    [VA_LIB_INSTALL_DIR="$withval"],
+    [VA_LIB_INSTALL_DIR='${libdir}/va'])
+AC_SUBST([VA_LIB_INSTALL_DIR])
+
 dnl
 dnl Gallium swrast configuration
 dnl
diff --git a/src/gallium/state_trackers/va/Makefile b/src/gallium/state_trackers/va/Makefile
index 28fe5d09694..15c6ee0ef6d 100644
--- a/src/gallium/state_trackers/va/Makefile
+++ b/src/gallium/state_trackers/va/Makefile
@@ -8,7 +8,7 @@ VA_MINOR = 3
 LIBRARY_DEFINES = -DVER_MAJOR=$(VA_MAJOR) -DVER_MINOR=$(VA_MINOR) $(STATE_TRACKER_DEFINES)
 
 LIBRARY_INCLUDES = \
-	$(shell pkg-config --cflags-only-I vdpau) \
+	$(shell pkg-config --cflags-only-I va) \
 	-I$(TOP)/src/gallium/winsys/g3dvl
 
 C_SOURCES = htab.c \
diff --git a/src/gallium/state_trackers/va/ftab.c b/src/gallium/state_trackers/va/ftab.c
index 694390b3464..034424cdee7 100644
--- a/src/gallium/state_trackers/va/ftab.c
+++ b/src/gallium/state_trackers/va/ftab.c
@@ -26,7 +26,7 @@
  **************************************************************************/
 
 #include <assert.h>
-#include <VA/va_backend.h>
+#include <va/va_backend.h>
 
 const struct VADriverVTable vtable =
 {
diff --git a/src/gallium/state_trackers/va/htab.c b/src/gallium/state_trackers/va/htab.c
index 7b7c111a4be..069c7930927 100644
--- a/src/gallium/state_trackers/va/htab.c
+++ b/src/gallium/state_trackers/va/htab.c
@@ -29,6 +29,10 @@
 #include <os/os_thread.h>
 #include "va_private.h"
 
+#define VL_HANDLES
+
+typedef uint32_t vlHandle;
+
 #ifdef VL_HANDLES
 static struct handle_table *htab = NULL;
 pipe_static_mutex(htab_lock);
diff --git a/src/gallium/state_trackers/va/va_private.h b/src/gallium/state_trackers/va/va_private.h
index e69de29bb2d..8264c259ed1 100644
--- a/src/gallium/state_trackers/va/va_private.h
+++ b/src/gallium/state_trackers/va/va_private.h
@@ -0,0 +1,32 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ 
+ #ifndef VA_PRIVATE_H
+ #define VA_PRIVATE_H
+ 
+ 
+ #endif // VA_PRIVATE_H
\ No newline at end of file
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
index 414d6597c6c..25f3516f821 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
@@ -36,19 +36,14 @@ enum vdpMPEG2States
 {
 	MPEG2_SEEK_HEADER,
 	MPEG2_HEADER_DONE,
-	MPEG2_BITSTREAM_DONE
-	MPEG2
+	MPEG2_BITSTREAM_DONE,
+	MPEG2_HEADER_START_CODE
 };
 
-enum vdpMPEG2Action
-{
-	MPEG2_
-};
 
 struct vdpMPEG2BitstreamParser
 {
 	enum vdpMPEG2States state;
-	enum vdpMPEG2Actions action;
 	uint32_t cur_bitstream;
 	const uint8_t *ptr_bitstream_end;
 	const uint8_t *ptr_bitstream;
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index d582b8e6c29..1deea3a67d3 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2010 Younes Manton.
+ * Copyright 2010 Younes Manton & Thomas Balling Sørensen.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
-- 
cgit v1.2.3


From 97a7cf230a70c64fff300931ae7c00aa00449c97 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 25 Oct 2010 19:59:31 +0200
Subject: Workaround for skipping vertex elements for r600g

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 1de17dd6239..977c8d67b55 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -278,7 +278,8 @@ create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
    for (i = 0; i < 4; ++i)
       vtex[i] = ureg_DECL_vs_input(shader, i + 1);
    /* Skip input 5 */
-   vtex[4] = ureg_DECL_vs_input(shader, 5);
+   ureg_DECL_vs_input(shader, 5);
+   vtex[4] = ureg_DECL_vs_input(shader, 6);
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
    for (i = 0; i < 5; ++i)
       o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
-- 
cgit v1.2.3


From aea4d004d2781ebb9cf437c9125ca232dd2d0aeb Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Mon, 25 Oct 2010 20:52:02 +0200
Subject: vl: more stub work for a va implementation

---
 src/gallium/state_trackers/va/Makefile        |  6 +-
 src/gallium/state_trackers/va/ftab.c          | 99 ++++++++++++++-------------
 src/gallium/state_trackers/va/va_context.c    | 57 +++++++++++++++
 src/gallium/state_trackers/va/va_image.c      | 41 +++++++++++
 src/gallium/state_trackers/va/va_private.h    | 16 ++++-
 src/gallium/state_trackers/va/va_subpicture.c | 40 +++++++++++
 6 files changed, 209 insertions(+), 50 deletions(-)
 create mode 100644 src/gallium/state_trackers/va/va_context.c
 create mode 100644 src/gallium/state_trackers/va/va_image.c
 create mode 100644 src/gallium/state_trackers/va/va_subpicture.c

diff --git a/src/gallium/state_trackers/va/Makefile b/src/gallium/state_trackers/va/Makefile
index 15c6ee0ef6d..1d6e303a4f1 100644
--- a/src/gallium/state_trackers/va/Makefile
+++ b/src/gallium/state_trackers/va/Makefile
@@ -12,7 +12,11 @@ LIBRARY_INCLUDES = \
 	-I$(TOP)/src/gallium/winsys/g3dvl
 
 C_SOURCES = htab.c \
-	    ftab.c 
+	    ftab.c \
+	    va_context.c \
+	    va_image.c \
+	    va_subpicture.c
+	    
 
 
 include ../../Makefile.template
diff --git a/src/gallium/state_trackers/va/ftab.c b/src/gallium/state_trackers/va/ftab.c
index 034424cdee7..4b9dc576511 100644
--- a/src/gallium/state_trackers/va/ftab.c
+++ b/src/gallium/state_trackers/va/ftab.c
@@ -26,35 +26,34 @@
  **************************************************************************/
 
 #include <assert.h>
+#include <va/va.h>
 #include <va/va_backend.h>
+#include "va_private.h"
 
-const struct VADriverVTable vtable =
+static struct VADriverVTable vtable =
 {
-   0, /* VAStatus (*vaTerminate) ( VADriverContextP ctx ); */
-   0, /* VAStatus (*vaQueryConfigProfiles) ( VADriverContextP ctx, VAProfile *profile_list,int *num_profiles); */
-   0, /* VAStatus (*vaQueryConfigEntrypoints) ( VADriverContextP ctx,	VAProfile profile, VAEntrypoint  *entrypoint_list, int *num_entrypoints	); */
-   0, /* VAStatus (*vaGetConfigAttributes) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint, VAConfigAttrib *attrib_list, int num_attribs ); */
-   0, /* VAStatus (*vaCreateConfig) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint,	VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id); */
-   0, /* VAStatus (*vaDestroyConfig) ( VADriverContextP ctx, VAConfigID config_id); */
-   0, /* VAStatus (*vaQueryConfigAttributes) ( VADriverContextP ctx, VAConfigID config_id, VAProfile *profile, VAEntrypoint *entrypoint, VAConfigAttrib *attrib_list, int *num_attribs); */
-   0, /* VAStatus (*vaCreateConfig) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint,	VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id); */
-   0, /* VAStatus (*vaDestroyConfig) ( VADriverContextP ctx, VAConfigID config_id ); */
-   0, /* VAStatus (*vaQueryConfigAttributes) (VADriverContextP ctx,VAConfigID config_id,VAProfile *profile,VAEntrypoint *entrypoint,VAConfigAttrib *attrib_list,int *num_attribs); */
-   0, /* VAStatus (*vaCreateSurfaces) ( VADriverContextP ctx,int width,int height,int format,int num_surfaces,VASurfaceID *surfaces); */
-   0, /* VAStatus (*vaDestroySurfaces) ( VADriverContextP ctx, VASurfaceID *surface_list, int num_surfaces ); */
-   0, /* VAStatus (*vaCreateContext) (VADriverContextP ctx,VAConfigID config_id,int picture_width,int picture_height,int flag,VASurfaceID *render_targets,int num_render_targets,VAContextID *context); */
-   0, /* VAStatus (*vaDestroyContext) (VADriverContextP ctx,VAContextID context); */
-   0, /* VAStatus (*vaCreateBuffer) (VADriverContextP ctx,VAContextID context,VABufferType type,unsigned int size,unsigned int num_elements,void *data,VABufferID *buf_id); */
-   0, /* VAStatus (*vaBufferSetNumElements) (VADriverContextP ctx,VABufferID buf_id,unsigned int num_elements); */
-   0, /* VAStatus (*vaMapBuffer) (VADriverContextP ctx,VABufferID buf_id,void **pbuf); */
-   0, /* VAStatus (*vaUnmapBuffer) (VADriverContextP ctx,VABufferID buf_id); */
-   0, /* VAStatus (*vaDestroyBuffer) (VADriverContextP ctx,VABufferID buffer_id); */
-   0, /* VAStatus (*vaBeginPicture) (VADriverContextP ctx,VAContextID context,VASurfaceID render_target); */
-   0, /* VAStatus (*vaRenderPicture) (VADriverContextP ctx,VAContextID context,VABufferID *buffers,int num_buffers); */
-   0, /* VAStatus (*vaEndPicture) (VADriverContextP ctx,VAContextID context); */
-   0, /* VAStatus (*vaSyncSurface) (VADriverContextP ctx,VASurfaceID render_target); */
-   0, /* VAStatus (*vaQuerySurfaceStatus) (VADriverContextP ctx,VASurfaceID render_target,VASurfaceStatus *status); */
-   0, /* VAStatus (*vaPutSurface) (
+   0x1, /* VAStatus (*vaTerminate) ( VADriverContextP ctx ); */
+   0x2, /* VAStatus (*vaQueryConfigProfiles) ( VADriverContextP ctx, VAProfile *profile_list,int *num_profiles); */
+   0x3, /* VAStatus (*vaQueryConfigEntrypoints) ( VADriverContextP ctx,	VAProfile profile, VAEntrypoint  *entrypoint_list, int *num_entrypoints	); */
+   0x4, /* VAStatus (*vaGetConfigAttributes) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint, VAConfigAttrib *attrib_list, int num_attribs ); */
+   0x5, /* VAStatus (*vaCreateConfig) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint,	VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id); */
+   0x6, /* VAStatus (*vaDestroyConfig) ( VADriverContextP ctx, VAConfigID config_id); */
+   0x7, /* VAStatus (*vaQueryConfigAttributes) ( VADriverContextP ctx, VAConfigID config_id, VAProfile *profile, VAEntrypoint *entrypoint, VAConfigAttrib *attrib_list, int *num_attribs); */
+   0x8, /* VAStatus (*vaCreateSurfaces) ( VADriverContextP ctx,int width,int height,int format,int num_surfaces,VASurfaceID *surfaces); */
+   0x9, /* VAStatus (*vaDestroySurfaces) ( VADriverContextP ctx, VASurfaceID *surface_list, int num_surfaces ); */
+   0x10, /* VAStatus (*vaCreateContext) (VADriverContextP ctx,VAConfigID config_id,int picture_width,int picture_height,int flag,VASurfaceID *render_targets,int num_render_targets,VAContextID *context); */
+   0x11, /* VAStatus (*vaDestroyContext) (VADriverContextP ctx,VAContextID context); */
+   0x12, /* VAStatus (*vaCreateBuffer) (VADriverContextP ctx,VAContextID context,VABufferType type,unsigned int size,unsigned int num_elements,void *data,VABufferID *buf_id); */
+   0x13, /* VAStatus (*vaBufferSetNumElements) (VADriverContextP ctx,VABufferID buf_id,unsigned int num_elements); */
+   0x14, /* VAStatus (*vaMapBuffer) (VADriverContextP ctx,VABufferID buf_id,void **pbuf); */
+   0x15, /* VAStatus (*vaUnmapBuffer) (VADriverContextP ctx,VABufferID buf_id); */
+   0x16, /* VAStatus (*vaDestroyBuffer) (VADriverContextP ctx,VABufferID buffer_id); */
+   0x17, /* VAStatus (*vaBeginPicture) (VADriverContextP ctx,VAContextID context,VASurfaceID render_target); */
+   0x18, /* VAStatus (*vaRenderPicture) (VADriverContextP ctx,VAContextID context,VABufferID *buffers,int num_buffers); */
+   0x19, /* VAStatus (*vaEndPicture) (VADriverContextP ctx,VAContextID context); */
+   0x20, /* VAStatus (*vaSyncSurface) (VADriverContextP ctx,VASurfaceID render_target); */
+   0x21, /* VAStatus (*vaQuerySurfaceStatus) (VADriverContextP ctx,VASurfaceID render_target,VASurfaceStatus *status); */
+   0x22, /* VAStatus (*vaPutSurface) (
     		VADriverContextP ctx,
 		VASurfaceID surface,
 		void* draw,
@@ -69,13 +68,13 @@ const struct VADriverVTable vtable =
 		VARectangle *cliprects, 
 		unsigned int number_cliprects, 
 		unsigned int flags); */
-   0, /* VAStatus (*vaQueryImageFormats) ( VADriverContextP ctx, VAImageFormat *format_list,int *num_formats); */
-   0, /* VAStatus (*vaCreateImage) (VADriverContextP ctx,VAImageFormat *format,int width,int height,VAImage *image); */
-   0, /* VAStatus (*vaDeriveImage) (VADriverContextP ctx,VASurfaceID surface,VAImage *image); */
-   0, /* VAStatus (*vaDestroyImage) (VADriverContextP ctx,VAImageID image); */
-   0, /* VAStatus (*vaSetImagePalette) (VADriverContextP ctx,VAImageID image, unsigned char *palette); */
-   0, /* VAStatus (*vaGetImage) (VADriverContextP ctx,VASurfaceID surface,int x,int y,unsigned int width,unsigned int height,VAImageID image); */
-   0, /* VAStatus (*vaPutImage) (
+   &vlVaQueryImageFormats, /* VAStatus (*vaQueryImageFormats) ( VADriverContextP ctx, VAImageFormat *format_list,int *num_formats); */
+   0x24, /* VAStatus (*vaCreateImage) (VADriverContextP ctx,VAImageFormat *format,int width,int height,VAImage *image); */
+   0x25, /* VAStatus (*vaDeriveImage) (VADriverContextP ctx,VASurfaceID surface,VAImage *image); */
+   0x26, /* VAStatus (*vaDestroyImage) (VADriverContextP ctx,VAImageID image); */
+   0x27, /* VAStatus (*vaSetImagePalette) (VADriverContextP ctx,VAImageID image, unsigned char *palette); */
+   0x28, /* VAStatus (*vaGetImage) (VADriverContextP ctx,VASurfaceID surface,int x,int y,unsigned int width,unsigned int height,VAImageID image); */
+   0x29, /* VAStatus (*vaPutImage) (
 		VADriverContextP ctx,
 		VASurfaceID surface,
 		VAImageID image,
@@ -88,13 +87,13 @@ const struct VADriverVTable vtable =
 		unsigned int dest_width,
 		unsigned int dest_height
 	); */
-   0,	/* VAStatus (*vaQuerySubpictureFormats) (VADriverContextP ctx,VAImageFormat *format_list,unsigned int *flags,unsigned int *num_formats); */
-   0, /* VAStatus (*vaCreateSubpicture) (VADriverContextP ctx,VAImageID image,VASubpictureID *subpicture); */
-   0, /* VAStatus (*vaDestroySubpicture) (VADriverContextP ctx,VASubpictureID subpicture); */
-   0, /* VAStatus (*vaSetSubpictureImage) (VADriverContextP ctx,VASubpictureID subpicture,VAImageID image); */
-   0, /* VAStatus (*vaSetSubpictureChromakey) (VADriverContextP ctx,VASubpictureID subpicture,unsigned int chromakey_min,unsigned int chromakey_max,unsigned int chromakey_mask); */
-   0, /* VAStatus (*vaSetSubpictureGlobalAlpha) (VADriverContextP ctx,VASubpictureID subpicture,float global_alpha); */
-   0, /* VAStatus (*vaAssociateSubpicture) (
+   &vlVaQuerySubpictureFormats,	/* VAStatus (*vaQuerySubpictureFormats) (VADriverContextP ctx,VAImageFormat *format_list,unsigned int *flags,unsigned int *num_formats); */
+   0x31, /* VAStatus (*vaCreateSubpicture) (VADriverContextP ctx,VAImageID image,VASubpictureID *subpicture); */
+   0x32, /* VAStatus (*vaDestroySubpicture) (VADriverContextP ctx,VASubpictureID subpicture); */
+   0x33, /* VAStatus (*vaSetSubpictureImage) (VADriverContextP ctx,VASubpictureID subpicture,VAImageID image); */
+   0x34, /* VAStatus (*vaSetSubpictureChromakey) (VADriverContextP ctx,VASubpictureID subpicture,unsigned int chromakey_min,unsigned int chromakey_max,unsigned int chromakey_mask); */
+   0x35, /* VAStatus (*vaSetSubpictureGlobalAlpha) (VADriverContextP ctx,VASubpictureID subpicture,float global_alpha); */
+   0x36, /* VAStatus (*vaAssociateSubpicture) (
 		VADriverContextP ctx,
 		VASubpictureID subpicture,
 		VASurfaceID *target_surfaces,
@@ -108,12 +107,12 @@ const struct VADriverVTable vtable =
 		unsigned short dest_width,
 		unsigned short dest_height,
 		unsigned int flags); */
-   0, /* VAStatus (*vaDeassociateSubpicture) (VADriverContextP ctx,VASubpictureID subpicture,VASurfaceID *target_surfaces,int num_surfaces); */
-   0, /* VAStatus (*vaQueryDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int *num_attributes); */
-   0, /* VAStatus (*vaGetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
-   0, /* VAStatus (*vaSetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
-   0, /* VAStatus (*vaBufferInfo) (VADriverContextP ctx,VAContextID context,VABufferID buf_id,VABufferType *type,unsigned int *size,unsigned int *num_elements); */
-   0, /* VAStatus (*vaLockSurface) (
+   0x37, /* VAStatus (*vaDeassociateSubpicture) (VADriverContextP ctx,VASubpictureID subpicture,VASurfaceID *target_surfaces,int num_surfaces); */
+   0x38, /* VAStatus (*vaQueryDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int *num_attributes); */
+   0x39, /* VAStatus (*vaGetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
+   0x40, /* VAStatus (*vaSetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
+   0x41, /* VAStatus (*vaBufferInfo) (VADriverContextP ctx,VAContextID context,VABufferID buf_id,VABufferType *type,unsigned int *size,unsigned int *num_elements); */
+   0x42, /* VAStatus (*vaLockSurface) (
 		VADriverContextP ctx,
                 VASurfaceID surface,
                 unsigned int *fourcc,
@@ -125,7 +124,11 @@ const struct VADriverVTable vtable =
                 unsigned int *chroma_v_offset,
                 unsigned int *buffer_name,
                 void **buffer); */
-   0, /* VAStatus (*vaUnlockSurface) (VADriverContextP ctx,VASurfaceID surface); */
-   0 /* struct VADriverVTableGLX *glx; "Optional" */
+   0x43, /* VAStatus (*vaUnlockSurface) (VADriverContextP ctx,VASurfaceID surface); */
+   0x44 /* struct VADriverVTableGLX *glx; "Optional" */
 };
 
+struct VADriverVTable vlVaGetVtable()
+{
+	return vtable;
+}
\ No newline at end of file
diff --git a/src/gallium/state_trackers/va/va_context.c b/src/gallium/state_trackers/va/va_context.c
new file mode 100644
index 00000000000..0b8d7865f73
--- /dev/null
+++ b/src/gallium/state_trackers/va/va_context.c
@@ -0,0 +1,57 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ 
+#include <pipe/p_compiler.h>
+#include <pipe/p_video_context.h>
+#include <util/u_debug.h>
+#include <va/va.h>
+#include <va/va_backend.h>
+#include "va_private.h"
+
+//struct VADriverVTable vlVaGetVtable();
+
+PUBLIC
+VAStatus __vaDriverInit_0_31 (VADriverContextP ctx)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+	
+	ctx->str_vendor = "mesa gallium vaapi";
+	ctx->vtable = vlVaGetVtable();
+	ctx->max_attributes = 1;
+	ctx->max_display_attributes = 1;
+	ctx->max_entrypoints = 1;
+	ctx->max_image_formats = 1;
+	ctx->max_profiles = 1;
+	ctx->max_subpic_formats = 1;
+	ctx->version_major = 3;
+	ctx->version_minor = 1;
+	
+	VA_INFO("vl_screen_pointer %p\n",ctx->native_dpy);
+
+	return VA_STATUS_SUCCESS;
+}
\ No newline at end of file
diff --git a/src/gallium/state_trackers/va/va_image.c b/src/gallium/state_trackers/va/va_image.c
new file mode 100644
index 00000000000..05b3ffcf403
--- /dev/null
+++ b/src/gallium/state_trackers/va/va_image.c
@@ -0,0 +1,41 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ 
+ #include <util/u_memory.h>
+ #include <util/u_format.h>
+ #include <va/va.h>
+ #include <va/va_backend.h>
+ #include "va_private.h"
+ 
+ VAStatus
+ vlVaQueryImageFormats ( 	VADriverContextP ctx, 
+							VAImageFormat *format_list,
+							int *num_formats)
+{
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
\ No newline at end of file
diff --git a/src/gallium/state_trackers/va/va_private.h b/src/gallium/state_trackers/va/va_private.h
index 8264c259ed1..9688097098a 100644
--- a/src/gallium/state_trackers/va/va_private.h
+++ b/src/gallium/state_trackers/va/va_private.h
@@ -28,5 +28,19 @@
  #ifndef VA_PRIVATE_H
  #define VA_PRIVATE_H
  
+ #include <va/va.h>
+ #include <va/va_backend.h>
+ #define VA_DEBUG(_str,...) debug_printf("[Gallium VA backend]: " _str,__VA_ARGS__)
+ #define VA_INFO(_str,...) VA_DEBUG("INFO: " _str,__VA_ARGS__)
+ #define VA_WARNING(_str,...) VA_DEBUG("WARNING: " _str,__VA_ARGS__)
+ #define VA_ERROR(_str,...) VA_DEBUG("ERROR: " _str,__VA_ARGS__)
+
+// Public functions:
+VAStatus __vaDriverInit_0_31 (VADriverContextP ctx);
+
+// Private functions:
+struct VADriverVTable vlVaGetVtable();
+VAStatus vlVaQueryImageFormats (VADriverContextP ctx,VAImageFormat *format_list,int *num_formats);
+VAStatus vlVaQuerySubpictureFormats(VADriverContextP ctx,VAImageFormat *format_list,unsigned int *flags,unsigned int *num_formats);
  
- #endif // VA_PRIVATE_H
\ No newline at end of file
+ #endif // VA_PRIVATE_H
diff --git a/src/gallium/state_trackers/va/va_subpicture.c b/src/gallium/state_trackers/va/va_subpicture.c
new file mode 100644
index 00000000000..211970913fa
--- /dev/null
+++ b/src/gallium/state_trackers/va/va_subpicture.c
@@ -0,0 +1,40 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+ 
+#include <va/va.h>
+#include <va/va_backend.h>
+#include "va_private.h"
+ 
+VAStatus 
+vlVaQuerySubpictureFormats(	VADriverContextP ctx,
+							VAImageFormat *format_list,
+							unsigned int *flags,
+							unsigned int *num_formats)
+{
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
\ No newline at end of file
-- 
cgit v1.2.3


From 1dccc4cfaa423f15ab582d2a0253a84a0ae0b9fa Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Mon, 25 Oct 2010 21:38:08 +0200
Subject: vl: add'ed stub for VaCreateImage

---
 src/gallium/state_trackers/va/ftab.c       |  2 +-
 src/gallium/state_trackers/va/va_image.c   | 15 +++++++++++++++
 src/gallium/state_trackers/va/va_private.h |  1 +
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/va/ftab.c b/src/gallium/state_trackers/va/ftab.c
index 4b9dc576511..651b7660964 100644
--- a/src/gallium/state_trackers/va/ftab.c
+++ b/src/gallium/state_trackers/va/ftab.c
@@ -69,7 +69,7 @@ static struct VADriverVTable vtable =
 		unsigned int number_cliprects, 
 		unsigned int flags); */
    &vlVaQueryImageFormats, /* VAStatus (*vaQueryImageFormats) ( VADriverContextP ctx, VAImageFormat *format_list,int *num_formats); */
-   0x24, /* VAStatus (*vaCreateImage) (VADriverContextP ctx,VAImageFormat *format,int width,int height,VAImage *image); */
+   &vlVaCreateImage, /* VAStatus (*vaCreateImage) (VADriverContextP ctx,VAImageFormat *format,int width,int height,VAImage *image); */
    0x25, /* VAStatus (*vaDeriveImage) (VADriverContextP ctx,VASurfaceID surface,VAImage *image); */
    0x26, /* VAStatus (*vaDestroyImage) (VADriverContextP ctx,VAImageID image); */
    0x27, /* VAStatus (*vaSetImagePalette) (VADriverContextP ctx,VAImageID image, unsigned char *palette); */
diff --git a/src/gallium/state_trackers/va/va_image.c b/src/gallium/state_trackers/va/va_image.c
index 05b3ffcf403..b7e1320a4e8 100644
--- a/src/gallium/state_trackers/va/va_image.c
+++ b/src/gallium/state_trackers/va/va_image.c
@@ -36,6 +36,21 @@
 							VAImageFormat *format_list,
 							int *num_formats)
 {
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+	
 
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaCreateImage(	VADriverContextP ctx,
+							VAImageFormat *format,
+							int width,
+							int height,
+							VAImage *image)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+	
 	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
\ No newline at end of file
diff --git a/src/gallium/state_trackers/va/va_private.h b/src/gallium/state_trackers/va/va_private.h
index 9688097098a..ccaa5c053ea 100644
--- a/src/gallium/state_trackers/va/va_private.h
+++ b/src/gallium/state_trackers/va/va_private.h
@@ -42,5 +42,6 @@ VAStatus __vaDriverInit_0_31 (VADriverContextP ctx);
 struct VADriverVTable vlVaGetVtable();
 VAStatus vlVaQueryImageFormats (VADriverContextP ctx,VAImageFormat *format_list,int *num_formats);
 VAStatus vlVaQuerySubpictureFormats(VADriverContextP ctx,VAImageFormat *format_list,unsigned int *flags,unsigned int *num_formats);
+VAStatus vlVaCreateImage(VADriverContextP ctx,VAImageFormat *format,int width,int height,VAImage *image);
  
  #endif // VA_PRIVATE_H
-- 
cgit v1.2.3


From 6ac1bbe21a978e326e6361426343b61d10140aa3 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Tue, 26 Oct 2010 13:44:19 +0200
Subject: vl: pipe-video branch merged with Königs pipe-video branch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 configure.ac                                    | 25 +++--------------
 src/gallium/drivers/softpipe/sp_video_context.c | 37 +++++--------------------
 src/gallium/include/pipe/p_defines.h            | 10 -------
 src/gallium/include/pipe/p_format.h             |  6 ++--
 src/gallium/include/pipe/p_screen.h             |  8 ------
 5 files changed, 14 insertions(+), 72 deletions(-)

diff --git a/configure.ac b/configure.ac
index c9daffbb3ae..5edd935920d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1486,11 +1486,7 @@ dnl
 dnl Gallium helper functions
 dnl
 gallium_check_st() {
-<<<<<<< HEAD
-    if test "x$HAVE_ST_DRI" = xyes || test "x$HAVE_ST_EGL" = xyes || test "x$HAVE_ST_XORG" = xyes || test "x$HAVE_ST_XVMC" = xyes || test "x$HAVE_ST_VDPAU" = xyes || test "x$HAVE_ST_VA" = xyes; then
-=======
-    if test "x$HAVE_ST_DRI" = xyes || test "x$HAVE_ST_XORG" = xyes || test "x$HAVE_ST_XVMC" = xyes; then
->>>>>>> 97a7cf230a70c64fff300931ae7c00aa00449c97
+    if test "x$HAVE_ST_DRI" = xyes || test "x$HAVE_ST_XORG" = xyes || test "x$HAVE_ST_XVMC" = xyes || test "x$HAVE_ST_VDPAU" = xyes || test "x$HAVE_ST_VA" = xyes; then
          GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS $1"
     fi
     if test "x$HAVE_ST_DRI" = xyes && test "x$2" != x; then
@@ -1499,24 +1495,15 @@ gallium_check_st() {
     if test "x$HAVE_ST_XORG" = xyes && test "x$3" != x; then
          GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $3"
     fi
-<<<<<<< HEAD
-    if test "x$HAVE_ST_XORG" = xyes && test "x$4" != x; then
+    if test "x$HAVE_ST_XVMC" = xyes && test "x$4" != x; then
          GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $4"
     fi
-    if test "x$HAVE_ST_XVMC" = xyes && test "x$5" != x; then
+    if test "x$HAVE_ST_VDPAU" = xyes && test "x$5" != x; then
          GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $5"
     fi
-    if test "x$HAVE_ST_VDPAU" = xyes && test "x$6" != x; then
+    if test "x$HAVE_ST_VA" = xyes && test "x$6" != x; then
          GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $6"
     fi
-    if test "x$HAVE_ST_VA" = xyes && test "x$7" != x; then
-         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $7"
-    fi
-=======
-    if test "x$HAVE_ST_XVMC" = xyes && test "x$5" != x; then
-         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $5"
-    fi
->>>>>>> 97a7cf230a70c64fff300931ae7c00aa00449c97
 }
 
 
@@ -1619,11 +1606,7 @@ AC_ARG_ENABLE([gallium-nouveau],
     [enable_gallium_nouveau=no])
 if test "x$enable_gallium_nouveau" = xyes; then
     GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50"
-<<<<<<< HEAD
-    gallium_check_st "nouveau/drm" "dri-nouveau" "egl-nouveau" "xorg-nouveau" "xvmc-nouveau"
-=======
     gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" "xvmc-nouveau"
->>>>>>> 97a7cf230a70c64fff300931ae7c00aa00449c97
 fi
 
 dnl
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index f39c46e596c..9acf7171e5a 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -98,13 +98,9 @@ sp_mpeg12_is_format_supported(struct pipe_video_context *vpipe,
    if (geom & PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO)
       return FALSE;
 
-<<<<<<< HEAD
-   return ctx->pipe->screen->is_format_supported(ctx->pipe->screen, format, PIPE_TEXTURE_2D, 1,
-                                                  usage, geom);
-=======
+
    return ctx->pipe->screen->is_format_supported(ctx->pipe->screen, format, PIPE_TEXTURE_2D,
                                                  0, usage, geom);
->>>>>>> 97a7cf230a70c64fff300931ae7c00aa00449c97
 }
 
 static void
@@ -134,7 +130,6 @@ static void
 sp_mpeg12_clear_render_target(struct pipe_video_context *vpipe,
                        struct pipe_surface *dst,
                        unsigned dstx, unsigned dsty,
-					   const float *rgba,
                        unsigned width, unsigned height)
 {
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
@@ -151,12 +146,10 @@ sp_mpeg12_clear_render_target(struct pipe_video_context *vpipe,
 
 static void
 sp_mpeg12_resource_copy_region(struct pipe_video_context *vpipe,
-                       struct pipe_resource *dst,
-					   struct pipe_subresource subdst,
-                       unsigned dstx, unsigned dsty, unsigned dstz,
-                       struct pipe_resource *src,
-					   struct pipe_subresource subsrc,
-                       unsigned srcx, unsigned srcy, unsigned srcz,
+                       struct pipe_surface *dst,
+                       unsigned dstx, unsigned dsty,
+                       struct pipe_surface *src,
+                       unsigned srcx, unsigned srcy,
                        unsigned width, unsigned height)
 {
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
@@ -164,13 +157,7 @@ sp_mpeg12_resource_copy_region(struct pipe_video_context *vpipe,
    assert(vpipe);
    assert(dst);
 
-<<<<<<< HEAD
-   if (ctx->pipe->resource_copy_region)
-      ctx->pipe->resource_copy_region(ctx->pipe, dst, subdst, dstx, dsty, dstz, src, subsrc, srcx, srcy, srcz, width, height);
-   else
-      util_resource_copy_region(ctx->pipe, dst, subdst, dstx, dsty, dstz, src, subsrc, srcx, srcy, srcz, width, height);
-=======
-   struct pipe_subresource subdst, subsrc;
+   struct pipe_subresource subdst,subsrc;
    subdst.face = dst->face;
    subdst.level = dst->level;
    subsrc.face = src->face;
@@ -184,7 +171,6 @@ sp_mpeg12_resource_copy_region(struct pipe_video_context *vpipe,
       util_resource_copy_region(ctx->pipe, dst->texture, subdst, dstx, dsty, dst->zslice,
                                 src->texture, subsrc, srcx, srcy, src->zslice,
                                 width, height);
->>>>>>> 97a7cf230a70c64fff300931ae7c00aa00449c97
 }
 
 static struct pipe_transfer*
@@ -366,18 +352,12 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
    rast.flatshade = 1;
    rast.flatshade_first = 0;
    rast.light_twoside = 0;
-<<<<<<< HEAD
-   rast.cull_face = PIPE_FACE_FRONT;
-   rast.fill_front = PIPE_POLYGON_MODE_FILL;
-   rast.fill_back = PIPE_POLYGON_MODE_FILL;
-=======
    rast.front_ccw = 1;
    rast.cull_face = PIPE_FACE_NONE;
    rast.fill_back = PIPE_POLYGON_MODE_FILL;
    rast.fill_front = PIPE_POLYGON_MODE_FILL;
    rast.offset_point = 0;
    rast.offset_line = 0;
->>>>>>> 97a7cf230a70c64fff300931ae7c00aa00449c97
    rast.scissor = 0;
    rast.poly_smooth = 0;
    rast.poly_stipple_enable = 0;
@@ -400,11 +380,8 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
    ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
    ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
 
-<<<<<<< HEAD
-
-=======
    memset(&blend, 0, sizeof blend);
->>>>>>> 97a7cf230a70c64fff300931ae7c00aa00449c97
+
    blend.independent_blend_enable = 0;
    blend.rt[0].blend_enable = 0;
    blend.rt[0].rgb_func = PIPE_BLEND_ADD;
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index f8eeebf6a71..8bd509c88c2 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -494,10 +494,6 @@ enum pipe_shader_cap
 #define PIPE_REFERENCED_FOR_READ  (1 << 0)
 #define PIPE_REFERENCED_FOR_WRITE (1 << 1)
 
-<<<<<<< HEAD
-
-=======
->>>>>>> 97a7cf230a70c64fff300931ae7c00aa00449c97
 enum pipe_video_codec
 {
    PIPE_VIDEO_CODEC_UNKNOWN = 0,
@@ -523,10 +519,7 @@ enum pipe_video_profile
    PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
 };
 
-<<<<<<< HEAD
 
-=======
->>>>>>> 97a7cf230a70c64fff300931ae7c00aa00449c97
 /**
  * Composite query types
  */
@@ -540,10 +533,7 @@ struct pipe_query_data_timestamp_disjoint
    uint64_t frequency;
    boolean  disjoint;
 };
-<<<<<<< HEAD
 
-=======
->>>>>>> 97a7cf230a70c64fff300931ae7c00aa00449c97
 
 #ifdef __cplusplus
 }
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 9a59c9f9ea0..119d304d927 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -199,9 +199,9 @@ enum pipe_format {
    PIPE_FORMAT_AI44                  = 142,
 
    /* some stencil samplers formats */
-   PIPE_FORMAT_X24S8_USCALED           = 136,
-   PIPE_FORMAT_S8X24_USCALED           = 137,
-   PIPE_FORMAT_X32_S8X24_USCALED       = 138,
+   PIPE_FORMAT_X24S8_USCALED           = 143,
+   PIPE_FORMAT_S8X24_USCALED           = 144,
+   PIPE_FORMAT_X32_S8X24_USCALED       = 145,
    PIPE_FORMAT_COUNT
 };
 
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index 0303c5b2ea9..75eeaeba1f7 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -92,21 +92,13 @@ struct pipe_screen {
     */
    int (*get_shader_param)( struct pipe_screen *, unsigned shader, enum pipe_shader_cap param );
 
-<<<<<<< HEAD
    struct pipe_context * (*context_create)( struct pipe_screen *, void *priv );
-=======
-   struct pipe_context * (*context_create)( struct pipe_screen *,
-                                            void *priv );
->>>>>>> 97a7cf230a70c64fff300931ae7c00aa00449c97
 
    struct pipe_video_context * (*video_context_create)( struct pipe_screen *screen,
                                                         enum pipe_video_profile profile,
                                                         enum pipe_video_chroma_format chroma_format,
                                                         unsigned width, unsigned height, void *priv );
-<<<<<<< HEAD
 
-=======
->>>>>>> 97a7cf230a70c64fff300931ae7c00aa00449c97
 
    /**
     * Check if the given pipe_format is supported as a texture or
-- 
cgit v1.2.3


From 050dfe9caf364fdaac91db1313988275774a7eaa Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Tue, 26 Oct 2010 13:58:19 +0200
Subject: vl: fix some build issues after the merge

---
 src/gallium/targets/Makefile.xvmc | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/gallium/targets/Makefile.xvmc b/src/gallium/targets/Makefile.xvmc
index 08529b38b70..d5d33bde42e 100644
--- a/src/gallium/targets/Makefile.xvmc
+++ b/src/gallium/targets/Makefile.xvmc
@@ -2,6 +2,7 @@
 
 LIBBASENAME = XvMCg3dvl
 LIBNAME = lib$(LIBBASENAME).so
+LIB_GLOB=lib$(LIBBASENAME).*so*
 XVMC_MAJOR = 1
 XVMC_MINOR = 0
 INCLUDES = -I$(TOP)/src/gallium/include \
@@ -11,7 +12,7 @@ INCLUDES = -I$(TOP)/src/gallium/include \
 	   -I$(TOP)/src/gallium/winsys/g3dvl \
 	   $(DRIVER_INCLUDES)
 DEFINES = -DGALLIUM_TRACE $(DRIVER_DEFINES)
-LIBS = $(EXTRA_LIB_PATH) $(DRIVER_LIBS) -lXvMC -lXv -lX11 -lm
+LIBS = $(EXTRA_LIB_PATH) $(DRIVER_LIBS) -lXv -lX11 -lm
 STATE_TRACKER_LIB = $(TOP)/src/gallium/state_trackers/xorg/xvmc/libxvmctracker.a
 
 # XXX: Hack, XvMC public funcs aren't exported if we link to libxvmctracker.a :(
@@ -55,8 +56,8 @@ clean:
 	-rm -f *.o *~ *.so $(SYMLINKS)
 	-rm -f depend depend.bak
 
-#install: $(LIBNAME)
-#	$(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
-#	$(MINSTALL) -m 755 $(LIBNAME) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+install: default
+	$(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+	$(MINSTALL) -m 755 $(TOP)/$(LIB_DIR)/gallium/$(LIB_GLOB) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
 
 include depend
-- 
cgit v1.2.3


From 17ea7d16bd3477361d32091f445beca625703f63 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Tue, 26 Oct 2010 14:06:01 +0200
Subject: vl: creating cleaner way of naming libraries

---
 src/gallium/targets/Makefile.xvmc          | 5 ++---
 src/gallium/targets/xvmc-nouveau/Makefile  | 2 +-
 src/gallium/targets/xvmc-r600/Makefile     | 2 +-
 src/gallium/targets/xvmc-softpipe/Makefile | 2 ++
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/gallium/targets/Makefile.xvmc b/src/gallium/targets/Makefile.xvmc
index d5d33bde42e..6abe7f6b062 100644
--- a/src/gallium/targets/Makefile.xvmc
+++ b/src/gallium/targets/Makefile.xvmc
@@ -1,6 +1,5 @@
 # This makefile template is used to build libXvMCg3dvl.so
 
-LIBBASENAME = XvMCg3dvl
 LIBNAME = lib$(LIBBASENAME).so
 LIB_GLOB=lib$(LIBBASENAME).*so*
 XVMC_MAJOR = 1
@@ -57,7 +56,7 @@ clean:
 	-rm -f depend depend.bak
 
 install: default
-	$(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
-	$(MINSTALL) -m 755 $(TOP)/$(LIB_DIR)/gallium/$(LIB_GLOB) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR)
+	$(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR)
+	$(MINSTALL) -m 755 $(TOP)/$(LIB_DIR)/gallium/$(LIB_GLOB) $(DESTDIR)$(INSTALL_DIR)/$(LIB_DIR)
 
 include depend
diff --git a/src/gallium/targets/xvmc-nouveau/Makefile b/src/gallium/targets/xvmc-nouveau/Makefile
index fe418b07681..4384eeaeadf 100644
--- a/src/gallium/targets/xvmc-nouveau/Makefile
+++ b/src/gallium/targets/xvmc-nouveau/Makefile
@@ -1,7 +1,7 @@
 TOP = ../../../..
 include $(TOP)/configs/current
 
-#LIBNAME =
+LIBBASENAME = XvMCnouveau
 
 PIPE_DRIVERS = \
 	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
diff --git a/src/gallium/targets/xvmc-r600/Makefile b/src/gallium/targets/xvmc-r600/Makefile
index 25aeb65059f..62e47b53851 100644
--- a/src/gallium/targets/xvmc-r600/Makefile
+++ b/src/gallium/targets/xvmc-r600/Makefile
@@ -1,7 +1,7 @@
 TOP = ../../../..
 include $(TOP)/configs/current
 
-#LIBNAME =
+LIBBASENAME = XvMCr600
 
 PIPE_DRIVERS = \
         $(TOP)/src/gallium/drivers/r600/libr600.a \
diff --git a/src/gallium/targets/xvmc-softpipe/Makefile b/src/gallium/targets/xvmc-softpipe/Makefile
index 1e3ff8ac89c..5b60bede589 100644
--- a/src/gallium/targets/xvmc-softpipe/Makefile
+++ b/src/gallium/targets/xvmc-softpipe/Makefile
@@ -1,6 +1,8 @@
 TOP = ../../../..
 include $(TOP)/configs/current
 
+LIBBASENAME = XvMCsoftpipe
+
 DRIVER_DEFINES = -DGALLIUM_SOFTPIPE
 DRIVER_INCLUDES =
 
-- 
cgit v1.2.3


From 990cb6296351a41a2e728f181c0dc096eaddaeb7 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Wed, 27 Oct 2010 11:00:11 +0200
Subject: vl: commited Orasanu Lucian's patch containing va stubs.

---
 configure.ac                                   |   3 +
 src/gallium/state_trackers/va/Makefile         |   6 +-
 src/gallium/state_trackers/va/ftab.c           |  96 ++++++++++-----------
 src/gallium/state_trackers/va/va_context.c     |  42 ++++++++--
 src/gallium/state_trackers/va/va_image.c       | 104 ++++++++++++++++++-----
 src/gallium/state_trackers/va/va_private.h     | 112 ++++++++++++++++++++++---
 src/gallium/state_trackers/va/va_subpicture.c  | 103 +++++++++++++++++++++--
 src/gallium/state_trackers/xorg/xvmc/context.c |   2 +-
 8 files changed, 372 insertions(+), 96 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5edd935920d..eac293f56aa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1623,6 +1623,9 @@ if test "x$enable_gallium_g3dvl" = xyes; then
 	if test "x$HAVE_ST_VDPAU" = xyes; then
         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS vdpau-softpipe"
 	fi
+	if test "x$HAVE_ST_XVMC" = xyes; then
+        GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS xvmc-softpipe"
+	fi
 	if test "x$HAVE_ST_VA" = xyes; then
 	GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS va-softpipe"
 	fi
diff --git a/src/gallium/state_trackers/va/Makefile b/src/gallium/state_trackers/va/Makefile
index 1d6e303a4f1..1e22bb50d1d 100644
--- a/src/gallium/state_trackers/va/Makefile
+++ b/src/gallium/state_trackers/va/Makefile
@@ -15,7 +15,11 @@ C_SOURCES = htab.c \
 	    ftab.c \
 	    va_context.c \
 	    va_image.c \
-	    va_subpicture.c
+	    va_subpicture.c \
+	    va_buffer.c \
+	    va_config.c \
+            va_picture.c \
+            va_surface.c
 	    
 
diff --git a/src/gallium/state_trackers/va/ftab.c b/src/gallium/state_trackers/va/ftab.c
index 651b7660964..010c04a7d28 100644
--- a/src/gallium/state_trackers/va/ftab.c
+++ b/src/gallium/state_trackers/va/ftab.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2010 Thomas Balling Sørensen.
+ * Copyright 2010 Thomas Balling Sørensen & Orasanu Lucian.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -32,28 +32,28 @@
 
 static struct VADriverVTable vtable =
 {
-   0x1, /* VAStatus (*vaTerminate) ( VADriverContextP ctx ); */
-   0x2, /* VAStatus (*vaQueryConfigProfiles) ( VADriverContextP ctx, VAProfile *profile_list,int *num_profiles); */
-   0x3, /* VAStatus (*vaQueryConfigEntrypoints) ( VADriverContextP ctx,	VAProfile profile, VAEntrypoint  *entrypoint_list, int *num_entrypoints	); */
-   0x4, /* VAStatus (*vaGetConfigAttributes) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint, VAConfigAttrib *attrib_list, int num_attribs ); */
-   0x5, /* VAStatus (*vaCreateConfig) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint,	VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id); */
-   0x6, /* VAStatus (*vaDestroyConfig) ( VADriverContextP ctx, VAConfigID config_id); */
-   0x7, /* VAStatus (*vaQueryConfigAttributes) ( VADriverContextP ctx, VAConfigID config_id, VAProfile *profile, VAEntrypoint *entrypoint, VAConfigAttrib *attrib_list, int *num_attribs); */
-   0x8, /* VAStatus (*vaCreateSurfaces) ( VADriverContextP ctx,int width,int height,int format,int num_surfaces,VASurfaceID *surfaces); */
-   0x9, /* VAStatus (*vaDestroySurfaces) ( VADriverContextP ctx, VASurfaceID *surface_list, int num_surfaces ); */
-   0x10, /* VAStatus (*vaCreateContext) (VADriverContextP ctx,VAConfigID config_id,int picture_width,int picture_height,int flag,VASurfaceID *render_targets,int num_render_targets,VAContextID *context); */
-   0x11, /* VAStatus (*vaDestroyContext) (VADriverContextP ctx,VAContextID context); */
-   0x12, /* VAStatus (*vaCreateBuffer) (VADriverContextP ctx,VAContextID context,VABufferType type,unsigned int size,unsigned int num_elements,void *data,VABufferID *buf_id); */
-   0x13, /* VAStatus (*vaBufferSetNumElements) (VADriverContextP ctx,VABufferID buf_id,unsigned int num_elements); */
-   0x14, /* VAStatus (*vaMapBuffer) (VADriverContextP ctx,VABufferID buf_id,void **pbuf); */
-   0x15, /* VAStatus (*vaUnmapBuffer) (VADriverContextP ctx,VABufferID buf_id); */
-   0x16, /* VAStatus (*vaDestroyBuffer) (VADriverContextP ctx,VABufferID buffer_id); */
-   0x17, /* VAStatus (*vaBeginPicture) (VADriverContextP ctx,VAContextID context,VASurfaceID render_target); */
-   0x18, /* VAStatus (*vaRenderPicture) (VADriverContextP ctx,VAContextID context,VABufferID *buffers,int num_buffers); */
-   0x19, /* VAStatus (*vaEndPicture) (VADriverContextP ctx,VAContextID context); */
-   0x20, /* VAStatus (*vaSyncSurface) (VADriverContextP ctx,VASurfaceID render_target); */
-   0x21, /* VAStatus (*vaQuerySurfaceStatus) (VADriverContextP ctx,VASurfaceID render_target,VASurfaceStatus *status); */
-   0x22, /* VAStatus (*vaPutSurface) (
+	&vlVaTerminate, /* VAStatus (*vaTerminate) ( VADriverContextP ctx ); */
+	&vlVaQueryConfigProfiles, /* VAStatus (*vaQueryConfigProfiles) ( VADriverContextP ctx, VAProfile *profile_list,int *num_profiles); */
+	&vlVaQueryConfigEntrypoints, /* VAStatus (*vaQueryConfigEntrypoints) ( VADriverContextP ctx,	VAProfile profile, VAEntrypoint  *entrypoint_list, int *num_entrypoints	); */
+	&vlVaGetConfigAttributes, /* VAStatus (*vaGetConfigAttributes) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint, VAConfigAttrib *attrib_list, int num_attribs ); */
+	&vlVaCreateConfig, /* VAStatus (*vaCreateConfig) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint,	VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id); */
+	&vlVaDestroyConfig, /* VAStatus (*vaDestroyConfig) ( VADriverContextP ctx, VAConfigID config_id); */
+	&vlVaQueryConfigAttributes, /* VAStatus (*vaQueryConfigAttributes) ( VADriverContextP ctx, VAConfigID config_id, VAProfile *profile, VAEntrypoint *entrypoint, VAConfigAttrib *attrib_list, int *num_attribs); */
+	&vlVaCreateSurfaces, /* VAStatus (*vaCreateSurfaces) ( VADriverContextP ctx,int width,int height,int format,int num_surfaces,VASurfaceID *surfaces); */
+	&vlVaDestroySurfaces, /* VAStatus (*vaDestroySurfaces) ( VADriverContextP ctx, VASurfaceID *surface_list, int num_surfaces ); */
+	&vlVaCreateContext, /* VAStatus (*vaCreateContext) (VADriverContextP ctx,VAConfigID config_id,int picture_width,int picture_height,int flag,VASurfaceID *render_targets,int num_render_targets,VAContextID *context); */
+	&vlVaDestroyContext, /* VAStatus (*vaDestroyContext) (VADriverContextP ctx,VAContextID context); */
+	&vlVaCreateBuffer, /* VAStatus (*vaCreateBuffer) (VADriverContextP ctx,VAContextID context,VABufferType type,unsigned int size,unsigned int num_elements,void *data,VABufferID *buf_id); */
+	&vlVaBufferSetNumElements, /* VAStatus (*vaBufferSetNumElements) (VADriverContextP ctx,VABufferID buf_id,unsigned int num_elements); */
+	&vlVaMapBuffer, /* VAStatus (*vaMapBuffer) (VADriverContextP ctx,VABufferID buf_id,void **pbuf); */
+	&vlVaUnmapBuffer, /* VAStatus (*vaUnmapBuffer) (VADriverContextP ctx,VABufferID buf_id); */
+	&vlVaDestroyBuffers, /* VAStatus (*vaDestroyBuffer) (VADriverContextP ctx,VABufferID buffer_id); */
+	&vlVaBeginPicture, /* VAStatus (*vaBeginPicture) (VADriverContextP ctx,VAContextID context,VASurfaceID render_target); */
+	&vlVaRenderPicture, /* VAStatus (*vaRenderPicture) (VADriverContextP ctx,VAContextID context,VABufferID *buffers,int num_buffers); */
+	&vlVaEndPicture, /* VAStatus (*vaEndPicture) (VADriverContextP ctx,VAContextID context); */
+	&vlVaSyncSurface, /* VAStatus (*vaSyncSurface) (VADriverContextP ctx,VASurfaceID render_target); */
+	&vlVaQuerySurfaceStatus, /* VAStatus (*vaQuerySurfaceStatus) (VADriverContextP ctx,VASurfaceID render_target,VASurfaceStatus *status); */
+	&vlVaPutSurface, /* VAStatus (*vaPutSurface) (
     		VADriverContextP ctx,
 		VASurfaceID surface,
 		void* draw,
@@ -65,16 +65,16 @@ static struct VADriverVTable vtable =
 		short desty,
 		unsigned short destw,
 		unsigned short desth,
-		VARectangle *cliprects, 
-		unsigned int number_cliprects, 
+		VARectangle *cliprects,
+		unsigned int number_cliprects,
 		unsigned int flags); */
-   &vlVaQueryImageFormats, /* VAStatus (*vaQueryImageFormats) ( VADriverContextP ctx, VAImageFormat *format_list,int *num_formats); */
-   &vlVaCreateImage, /* VAStatus (*vaCreateImage) (VADriverContextP ctx,VAImageFormat *format,int width,int height,VAImage *image); */
-   0x25, /* VAStatus (*vaDeriveImage) (VADriverContextP ctx,VASurfaceID surface,VAImage *image); */
-   0x26, /* VAStatus (*vaDestroyImage) (VADriverContextP ctx,VAImageID image); */
-   0x27, /* VAStatus (*vaSetImagePalette) (VADriverContextP ctx,VAImageID image, unsigned char *palette); */
-   0x28, /* VAStatus (*vaGetImage) (VADriverContextP ctx,VASurfaceID surface,int x,int y,unsigned int width,unsigned int height,VAImageID image); */
-   0x29, /* VAStatus (*vaPutImage) (
+	&vlVaQueryImageFormats, /* VAStatus (*vaQueryImageFormats) ( VADriverContextP ctx, VAImageFormat *format_list,int *num_formats); */
+	&vlVaCreateImage, /* VAStatus (*vaCreateImage) (VADriverContextP ctx,VAImageFormat *format,int width,int height,VAImage *image); */
+	&vlVaDeriveImage, /* VAStatus (*vaDeriveImage) (VADriverContextP ctx,VASurfaceID surface,VAImage *image); */
+	&vlVaDestroyImage, /* VAStatus (*vaDestroyImage) (VADriverContextP ctx,VAImageID image); */
+	&vlVaSetImagePalette, /* VAStatus (*vaSetImagePalette) (VADriverContextP ctx,VAImageID image, unsigned char *palette); */
+	&vlVaGetImage, /* VAStatus (*vaGetImage) (VADriverContextP ctx,VASurfaceID surface,int x,int y,unsigned int width,unsigned int height,VAImageID image); */
+	&vlVaPutImage, /* VAStatus (*vaPutImage) (
 		VADriverContextP ctx,
 		VASurfaceID surface,
 		VAImageID image,
@@ -87,13 +87,13 @@ static struct VADriverVTable vtable =
 		unsigned int dest_width,
 		unsigned int dest_height
 	); */
-   &vlVaQuerySubpictureFormats,	/* VAStatus (*vaQuerySubpictureFormats) (VADriverContextP ctx,VAImageFormat *format_list,unsigned int *flags,unsigned int *num_formats); */
-   0x31, /* VAStatus (*vaCreateSubpicture) (VADriverContextP ctx,VAImageID image,VASubpictureID *subpicture); */
-   0x32, /* VAStatus (*vaDestroySubpicture) (VADriverContextP ctx,VASubpictureID subpicture); */
-   0x33, /* VAStatus (*vaSetSubpictureImage) (VADriverContextP ctx,VASubpictureID subpicture,VAImageID image); */
-   0x34, /* VAStatus (*vaSetSubpictureChromakey) (VADriverContextP ctx,VASubpictureID subpicture,unsigned int chromakey_min,unsigned int chromakey_max,unsigned int chromakey_mask); */
-   0x35, /* VAStatus (*vaSetSubpictureGlobalAlpha) (VADriverContextP ctx,VASubpictureID subpicture,float global_alpha); */
-   0x36, /* VAStatus (*vaAssociateSubpicture) (
+	&vlVaQuerySubpictureFormats,	/* VAStatus (*vaQuerySubpictureFormats) (VADriverContextP ctx,VAImageFormat *format_list,unsigned int *flags,unsigned int *num_formats); */
+	&vlVaCreateSubpicture, /* VAStatus (*vaCreateSubpicture) (VADriverContextP ctx,VAImageID image,VASubpictureID *subpicture); */
+	&vlVaDestroySubpicture, /* VAStatus (*vaDestroySubpicture) (VADriverContextP ctx,VASubpictureID subpicture); */
+	&vlVaSubpictureImage, /* VAStatus (*vaSetSubpictureImage) (VADriverContextP ctx,VASubpictureID subpicture,VAImageID image); */
+	&vlVaSetSubpictureChromakey, /* VAStatus (*vaSetSubpictureChromakey) (VADriverContextP ctx,VASubpictureID subpicture,unsigned int chromakey_min,unsigned int chromakey_max,unsigned int chromakey_mask); */
+	&vlVaSetSubpictureGlobalAlpha, /* VAStatus (*vaSetSubpictureGlobalAlpha) (VADriverContextP ctx,VASubpictureID subpicture,float global_alpha); */
+	&vlVaAssociateSubpicture, /* VAStatus (*vaAssociateSubpicture) (
 		VADriverContextP ctx,
 		VASubpictureID subpicture,
 		VASurfaceID *target_surfaces,
@@ -107,12 +107,12 @@ static struct VADriverVTable vtable =
 		unsigned short dest_width,
 		unsigned short dest_height,
 		unsigned int flags); */
-   0x37, /* VAStatus (*vaDeassociateSubpicture) (VADriverContextP ctx,VASubpictureID subpicture,VASurfaceID *target_surfaces,int num_surfaces); */
-   0x38, /* VAStatus (*vaQueryDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int *num_attributes); */
-   0x39, /* VAStatus (*vaGetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
-   0x40, /* VAStatus (*vaSetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
-   0x41, /* VAStatus (*vaBufferInfo) (VADriverContextP ctx,VAContextID context,VABufferID buf_id,VABufferType *type,unsigned int *size,unsigned int *num_elements); */
-   0x42, /* VAStatus (*vaLockSurface) (
+	&vlVaDeassociateSubpicture, /* VAStatus (*vaDeassociateSubpicture) (VADriverContextP ctx,VASubpictureID subpicture,VASurfaceID *target_surfaces,int num_surfaces); */
+	&vlVaQueryDisplayAttributes, /* VAStatus (*vaQueryDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int *num_attributes); */
+	&vlVaGetDisplayAttributes, /* VAStatus (*vaGetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
+	&vlVaSetDisplayAttributes, /* VAStatus (*vaSetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
+	&vlVaBufferInfo, /* VAStatus (*vaBufferInfo) (VADriverContextP ctx,VAContextID context,VABufferID buf_id,VABufferType *type,unsigned int *size,unsigned int *num_elements); */
+	&vlVaLockSurface, /* VAStatus (*vaLockSurface) (
 		VADriverContextP ctx,
                 VASurfaceID surface,
                 unsigned int *fourcc,
@@ -124,11 +124,11 @@ static struct VADriverVTable vtable =
                 unsigned int *chroma_v_offset,
                 unsigned int *buffer_name,
                 void **buffer); */
-   0x43, /* VAStatus (*vaUnlockSurface) (VADriverContextP ctx,VASurfaceID surface); */
-   0x44 /* struct VADriverVTableGLX *glx; "Optional" */
+	&vlVaUnlockSurface, /* VAStatus (*vaUnlockSurface) (VADriverContextP ctx,VASurfaceID surface); */
+	0x44 /* struct VADriverVTableGLX *glx; "Optional" */
 };
 
 struct VADriverVTable vlVaGetVtable()
 {
 	return vtable;
-}
\ No newline at end of file
+}
diff --git a/src/gallium/state_trackers/va/va_context.c b/src/gallium/state_trackers/va/va_context.c
index 0b8d7865f73..7ef84606305 100644
--- a/src/gallium/state_trackers/va/va_context.c
+++ b/src/gallium/state_trackers/va/va_context.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2010 Thomas Balling Sørensen.
+ * Copyright 2010 Thomas Balling Sørensen & Orasanu Lucian.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,7 +24,7 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  **************************************************************************/
- 
+
 #include <pipe/p_compiler.h>
 #include <pipe/p_video_context.h>
 #include <util/u_debug.h>
@@ -39,7 +39,7 @@ VAStatus __vaDriverInit_0_31 (VADriverContextP ctx)
 {
 	if (!ctx)
 		return VA_STATUS_ERROR_INVALID_CONTEXT;
-	
+
 	ctx->str_vendor = "mesa gallium vaapi";
 	ctx->vtable = vlVaGetVtable();
 	ctx->max_attributes = 1;
@@ -50,8 +50,40 @@ VAStatus __vaDriverInit_0_31 (VADriverContextP ctx)
 	ctx->max_subpic_formats = 1;
 	ctx->version_major = 3;
 	ctx->version_minor = 1;
-	
+
 	VA_INFO("vl_screen_pointer %p\n",ctx->native_dpy);
 
 	return VA_STATUS_SUCCESS;
-}
\ No newline at end of file
+}
+
+VAStatus vlVaCreateContext(       VADriverContextP ctx,
+                                  VAConfigID config_id,
+                                  int picture_width,
+                                  int picture_height,
+                                  int flag,
+                                  VASurfaceID *render_targets,
+                                  int num_render_targets,
+                                  VAContextID *conext)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaDestroyContext(       VADriverContextP ctx,
+                                   VAContextID context)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaTerminate(       VADriverContextP ctx)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
diff --git a/src/gallium/state_trackers/va/va_image.c b/src/gallium/state_trackers/va/va_image.c
index b7e1320a4e8..b1f990a15eb 100644
--- a/src/gallium/state_trackers/va/va_image.c
+++ b/src/gallium/state_trackers/va/va_image.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2010 Thomas Balling Sørensen.
+ * Copyright 2010 Thomas Balling Sørensen & Orasanu Lucian.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,33 +24,99 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  **************************************************************************/
- 
- #include <util/u_memory.h>
- #include <util/u_format.h>
- #include <va/va.h>
- #include <va/va_backend.h>
- #include "va_private.h"
- 
- VAStatus
- vlVaQueryImageFormats ( 	VADriverContextP ctx, 
-							VAImageFormat *format_list,
-							int *num_formats)
+
+#include <util/u_memory.h>
+#include <util/u_format.h>
+#include <va/va.h>
+#include <va/va_backend.h>
+#include "va_private.h"
+
+VAStatus
+vlVaQueryImageFormats ( 	VADriverContextP ctx,
+                            VAImageFormat *format_list,
+                            int *num_formats)
 {
 	if (!ctx)
 		return VA_STATUS_ERROR_INVALID_CONTEXT;
-	
+
 
 	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
 VAStatus vlVaCreateImage(	VADriverContextP ctx,
-							VAImageFormat *format,
-							int width,
-							int height,
-							VAImage *image)
+                            VAImageFormat *format,
+                            int width,
+                            int height,
+                            VAImage *image)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaDeriveImage(	VADriverContextP ctx,
+                            VASurfaceID surface,
+                            VAImage *image)
 {
 	if (!ctx)
 		return VA_STATUS_ERROR_INVALID_CONTEXT;
-	
+
+
 	return VA_STATUS_ERROR_UNIMPLEMENTED;
-}
\ No newline at end of file
+}
+
+VAStatus vlDestroyImage(	VADriverContextP ctx,
+                            VAImageID image)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlSetImagePalette(	VADriverContextP ctx,
+                            VAImageID image,
+                            unsigned char *palette)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaGetImage(		VADriverContextP ctx,
+                            VASurfaceID surface,
+                            int x,
+                            int y,
+                            unsigned int width,
+                            unsigned int height,
+                            VAImageID image)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaPutImage(		VADriverContextP ctx,
+                            VASurfaceID surface,
+                            VAImageID image,
+                            int src_x,
+                            int src_y,
+                            unsigned int src_width,
+                            unsigned int src_height,
+                            int dest_x,
+                            int dest_y,
+                            unsigned int dest_width,
+                            unsigned int dest_height)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
diff --git a/src/gallium/state_trackers/va/va_private.h b/src/gallium/state_trackers/va/va_private.h
index ccaa5c053ea..3c9922e64fc 100644
--- a/src/gallium/state_trackers/va/va_private.h
+++ b/src/gallium/state_trackers/va/va_private.h
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2010 Thomas Balling Sørensen.
+ * Copyright 2010 Thomas Balling Sørensen & Orasanu Lucian.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,24 +24,110 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  **************************************************************************/
- 
- #ifndef VA_PRIVATE_H
- #define VA_PRIVATE_H
- 
- #include <va/va.h>
- #include <va/va_backend.h>
- #define VA_DEBUG(_str,...) debug_printf("[Gallium VA backend]: " _str,__VA_ARGS__)
- #define VA_INFO(_str,...) VA_DEBUG("INFO: " _str,__VA_ARGS__)
- #define VA_WARNING(_str,...) VA_DEBUG("WARNING: " _str,__VA_ARGS__)
- #define VA_ERROR(_str,...) VA_DEBUG("ERROR: " _str,__VA_ARGS__)
+
+#ifndef VA_PRIVATE_H
+#define VA_PRIVATE_H
+
+#include <va/va.h>
+#include <va/va_backend.h>
+#define VA_DEBUG(_str,...) debug_printf("[Gallium VA backend]: " _str,__VA_ARGS__)
+#define VA_INFO(_str,...) VA_DEBUG("INFO: " _str,__VA_ARGS__)
+#define VA_WARNING(_str,...) VA_DEBUG("WARNING: " _str,__VA_ARGS__)
+#define VA_ERROR(_str,...) VA_DEBUG("ERROR: " _str,__VA_ARGS__)
 
 // Public functions:
 VAStatus __vaDriverInit_0_31 (VADriverContextP ctx);
 
 // Private functions:
 struct VADriverVTable vlVaGetVtable();
+VAStatus vlVaTerminate (VADriverContextP ctx);
+VAStatus vlVaQueryConfigProfiles (VADriverContextP ctx, VAProfile *profile_list,int *num_profiles);
+VAStatus vlVaQueryConfigEntrypoints (VADriverContextP ctx, VAProfile profile, VAEntrypoint  *entrypoint_list, int *num_entrypoints);
+VAStatus vlVaGetConfigAttributes (VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint, VAConfigAttrib *attrib_list, int num_attribs);
+VAStatus vlVaCreateConfig (VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint, VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id);
+VAStatus vlVaDestroyConfig (VADriverContextP ctx, VAConfigID config_id);
+VAStatus vlVaQueryConfigAttributes (VADriverContextP ctx, VAConfigID config_id, VAProfile *profile, VAEntrypoint *entrypoint, VAConfigAttrib *attrib_list, int *num_attribs);
+VAStatus vlVaCreateSurfaces (VADriverContextP ctx,int width,int height,int format,int num_surfaces,VASurfaceID *surfaces);
+VAStatus vlVaDestroySurfaces (VADriverContextP ctx, VASurfaceID *surface_list, int num_surfaces);
+VAStatus vlVaCreateContext (VADriverContextP ctx,VAConfigID config_id,int picture_width,int picture_height,int flag,VASurfaceID *render_targets,int num_render_targets,VAContextID *context);
+VAStatus vlVaDestroyContext (VADriverContextP ctx,VAContextID context);
+VAStatus vlVaCreateBuffer (VADriverContextP ctx,VAContextID context,VABufferType type,unsigned int size,unsigned int num_elements,void *data,VABufferID *buf_id);
+VAStatus vlVaBufferSetNumElements (VADriverContextP ctx,VABufferID buf_id,unsigned int num_elements);
+VAStatus vlVaMapBuffer (VADriverContextP ctx,VABufferID buf_id,void **pbuf);
+VAStatus vlVaUnmapBuffer (VADriverContextP ctx,VABufferID buf_id);
+VAStatus vlVaDestroyBuffers (VADriverContextP ctx,VABufferID buffer_id);
+VAStatus vlVaBeginPicture (VADriverContextP ctx,VAContextID context,VASurfaceID render_target);
+VAStatus vlVaRenderPicture (VADriverContextP ctx,VAContextID context,VABufferID *buffers,int num_buffers);
+VAStatus vlVaEndPicture (VADriverContextP ctx,VAContextID context);
+VAStatus vlVaSyncSurface (VADriverContextP ctx,VASurfaceID render_target);
+VAStatus vlVaQuerySurfaceStatus (VADriverContextP ctx,VASurfaceID render_target,VASurfaceStatus *status);
+VAStatus vlVaPutSurface (VADriverContextP ctx,
+                         VASurfaceID surface,
+                         void* draw,
+                         short srcx,
+                         short srcy,
+                         unsigned short srcw,
+                         unsigned short srch,
+                         short destx,
+                         short desty,
+                         unsigned short destw,
+                         unsigned short desth,
+                         VARectangle *cliprects,
+                         unsigned int number_cliprects,
+                         unsigned int flags);
 VAStatus vlVaQueryImageFormats (VADriverContextP ctx,VAImageFormat *format_list,int *num_formats);
 VAStatus vlVaQuerySubpictureFormats(VADriverContextP ctx,VAImageFormat *format_list,unsigned int *flags,unsigned int *num_formats);
 VAStatus vlVaCreateImage(VADriverContextP ctx,VAImageFormat *format,int width,int height,VAImage *image);
- 
- #endif // VA_PRIVATE_H
+VAStatus vlVaDeriveImage(VADriverContextP ctx,VASurfaceID surface,VAImage *image);
+VAStatus vlVaDestroyImage(VADriverContextP ctx,VAImageID image);
+VAStatus vlVaSetImagePalette(VADriverContextP ctx,VAImageID image, unsigned char *palette);
+VAStatus vlVaGetImage(VADriverContextP ctx,VASurfaceID surface,int x,int y,unsigned int width,unsigned int height,VAImageID image);
+VAStatus vlVaPutImage(VADriverContextP ctx,
+                      VASurfaceID surface,
+                      VAImageID image,
+                      int src_x,
+                      int src_y,
+                      unsigned int src_width,
+                      unsigned int src_height,
+                      int dest_x,
+                      int dest_y,
+                      unsigned int dest_width,
+                      unsigned int dest_height);
+VAStatus vlVaQuerySubpictureFormats(VADriverContextP ctx,VAImageFormat *format_list,unsigned int *flags,unsigned int *num_formats);
+VAStatus vlVaCreateSubpicture(VADriverContextP ctx,VAImageID image,VASubpictureID *subpicture);
+VAStatus vlVaDestroySubpicture(VADriverContextP ctx,VASubpictureID subpicture);
+VAStatus vlVaSubpictureImage(VADriverContextP ctx,VASubpictureID subpicture,VAImageID image);
+VAStatus vlVaSetSubpictureChromakey(VADriverContextP ctx,VASubpictureID subpicture,unsigned int chromakey_min,unsigned int chromakey_max,unsigned int chromakey_mask);
+VAStatus vlVaSetSubpictureGlobalAlpha(VADriverContextP ctx,VASubpictureID subpicture,float global_alpha);
+VAStatus vlVaAssociateSubpicture(VADriverContextP ctx,
+                                 VASubpictureID subpicture,
+                                 VASurfaceID *target_surfaces,
+                                 int num_surfaces,
+                                 short src_x,
+                                 short src_y,
+                                 unsigned short src_width,
+                                 unsigned short src_height,
+                                 short dest_x,
+                                 short dest_y,
+                                 unsigned short dest_width,
+                                 unsigned short dest_height,
+                                 unsigned int flags);
+VAStatus vlVaDeassociateSubpicture(VADriverContextP ctx,VASubpictureID subpicture,VASurfaceID *target_surfaces,int num_surfaces);
+VAStatus vlVaQueryDisplayAttributes(VADriverContextP ctx,VADisplayAttribute *attr_list,int *num_attributes);
+VAStatus vlVaGetDisplayAttributes(VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes);
+VAStatus vlVaSetDisplayAttributes(VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes);
+VAStatus vlVaBufferInfo(VADriverContextP ctx,VAContextID context,VABufferID buf_id,VABufferType *type,unsigned int *size,unsigned int *num_elements);
+VAStatus vlVaLockSurface(VADriverContextP ctx,
+                         VASurfaceID surface,
+                         unsigned int *fourcc,
+                         unsigned int *luma_stride,
+                         unsigned int *chroma_u_stride,
+                         unsigned int *chroma_v_stride,
+                         unsigned int *luma_offset,
+                         unsigned int *chroma_u_offset,
+                         unsigned int *chroma_v_offset,
+                         unsigned int *buffer_name,
+                         void **buffer);
+VAStatus vlVaUnlockSurface(VADriverContextP ctx,VASurfaceID surface);
+
+#endif //VA_PRIVATE_H
diff --git a/src/gallium/state_trackers/va/va_subpicture.c b/src/gallium/state_trackers/va/va_subpicture.c
index 211970913fa..9317d313c65 100644
--- a/src/gallium/state_trackers/va/va_subpicture.c
+++ b/src/gallium/state_trackers/va/va_subpicture.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2010 Thomas Balling Sørensen.
+ * Copyright 2010 Thomas Balling Sørensen & Orasanu Lucian.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,17 +24,102 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  **************************************************************************/
- 
+
 #include <va/va.h>
 #include <va/va_backend.h>
 #include "va_private.h"
- 
-VAStatus 
-vlVaQuerySubpictureFormats(	VADriverContextP ctx,
-							VAImageFormat *format_list,
-							unsigned int *flags,
-							unsigned int *num_formats)
+
+VAStatus
+vlVaQuerySubpictureFormats(		VADriverContextP ctx,
+                                VAImageFormat *format_list,
+                                unsigned int *flags,
+                                unsigned int *num_formats)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+
+VAStatus vlVaCreateSubpicture(		VADriverContextP ctx,
+                                    VAImageID image,
+                                    VASubpictureID *subpicture)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaDestroySubpicture(		VADriverContextP ctx,
+                                    VASubpictureID subpicture)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaSubpictureImage(		VADriverContextP ctx,
+                                    VASubpictureID subpicture,
+                                    VAImageID image)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaSetSubpictureChromakey(	VADriverContextP ctx,
+                                        VASubpictureID subpicture,
+                                        unsigned int chromakey_min,
+                                        unsigned int chromakey_max,
+                                        unsigned int chromakey_mask)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaSetSubpictureGlobalAlpha(	VADriverContextP ctx,
+                                        VASubpictureID subpicture,
+                                        float global_alpha)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaAssociateSubpicture(	VADriverContextP ctx,
+                                    VASubpictureID subpicture,
+                                    VASurfaceID *target_surfaces,
+                                    int num_surfaces,
+                                    short src_x,
+                                    short src_y,
+                                    unsigned short src_width,
+                                    unsigned short src_height,
+                                    short dest_x,
+                                    short dest_y,
+                                    unsigned short dest_width,
+                                    unsigned short dest_height,
+                                    unsigned int flags)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaDeassociateSubpicture(	VADriverContextP ctx,
+                                    VASubpictureID subpicture,
+                                    VASurfaceID *target_surfaces,
+                                    int num_surfaces)
 {
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
 
 	return VA_STATUS_ERROR_UNIMPLEMENTED;
-}
\ No newline at end of file
+}
diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index 5e4af9e555a..688d68b6ea7 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -210,7 +210,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       return BadImplementation;
    }
    if (mc_type != (XVMC_MOCOMP | XVMC_MPEG_2)) {
-      XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Non-MPEG2/Mocomp acceleration unsupported.\n");
+      XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Non-MPEG2 acceleration unsupported.\n");
       return BadImplementation;
    }
    if (!(surface_flags & XVMC_INTRA_UNSIGNED)) {
-- 
cgit v1.2.3


From 664f10625a74a7e0ed1bfd44b2fb6f44bd8828a2 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Wed, 27 Oct 2010 13:01:18 +0200
Subject: vl: rest of Luc's patch

---
 src/gallium/state_trackers/va/va_buffer.c  |  96 +++++++++++++++++++++++
 src/gallium/state_trackers/va/va_config.c  | 100 ++++++++++++++++++++++++
 src/gallium/state_trackers/va/va_display.c |  66 ++++++++++++++++
 src/gallium/state_trackers/va/va_picture.c |  60 +++++++++++++++
 src/gallium/state_trackers/va/va_surface.c | 120 +++++++++++++++++++++++++++++
 5 files changed, 442 insertions(+)
 create mode 100644 src/gallium/state_trackers/va/va_buffer.c
 create mode 100644 src/gallium/state_trackers/va/va_config.c
 create mode 100644 src/gallium/state_trackers/va/va_display.c
 create mode 100644 src/gallium/state_trackers/va/va_picture.c
 create mode 100644 src/gallium/state_trackers/va/va_surface.c

diff --git a/src/gallium/state_trackers/va/va_buffer.c b/src/gallium/state_trackers/va/va_buffer.c
new file mode 100644
index 00000000000..19afd7b2a30
--- /dev/null
+++ b/src/gallium/state_trackers/va/va_buffer.c
@@ -0,0 +1,96 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen & Orasanu Lucian.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <va/va.h>
+#include <va/va_backend.h>
+#include "va_private.h"
+
+
+VAStatus vlVaCreateBuffer(		VADriverContextP ctx,
+                                VAContextID context,
+                                VABufferType type,
+                                unsigned int size,
+                                unsigned int num_elements,
+                                void *data,
+                                VABufferID *buf_id)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaBufferSetNumElements(	VADriverContextP ctx,
+                                    VABufferID buf_id,
+                                    unsigned int num_elements)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaMapBuffer(			VADriverContextP ctx,
+                                VABufferID buf_id,
+                                void **pbuff)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaUnmapBuffer(		VADriverContextP ctx,
+                                VABufferID buf_id)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaDestroyBuffers(		VADriverContextP ctx,
+                                    VABufferID buffer_id)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaBufferInfo(		VADriverContextP ctx,
+                                VAContextID context,
+                                VABufferID buf_id,
+                                VABufferType *type,
+                                unsigned int *size,
+                                unsigned int *num_elements)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
diff --git a/src/gallium/state_trackers/va/va_config.c b/src/gallium/state_trackers/va/va_config.c
new file mode 100644
index 00000000000..1bec28c29a2
--- /dev/null
+++ b/src/gallium/state_trackers/va/va_config.c
@@ -0,0 +1,100 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen & Orasanu Lucian.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <va/va.h>
+#include <va/va_backend.h>
+#include "va_private.h"
+
+VAStatus vlVaConfigProfiles(       VADriverContextP ctx,
+                                   VAProfile *profile_list,
+                                   int *num_profiles)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+
+VAStatus vlVaConfigEntrypoints(       VADriverContextP ctx,
+                                      VAProfile profile,
+                                      VAEntypoint *entypoint_list,
+                                      int *num_entrypoints)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+
+VAStatus vlVaGetConfigAttributes(       VADriverContextP ctx,
+                                        VAProfile profile,
+                                        VAEntrypoint entrypoint,
+                                        VAConfigAttrib *attrib_list,
+                                        int num_attribs)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaCreateConfig(       VADriverContextP ctx,
+                                 VAProfile profile,
+                                 VAEntrypoint entrypoint,
+                                 VAConfigAttrib *attrib_list,
+                                 int num_attribs,
+                                 VAConfigID *config_id)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaDestroyConfig(       VADriverContextP ctx,
+                                  VAConfigID config_id)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaQueryConfigAttributes(       VADriverContextP ctx,
+        VAConfigID config_id,
+        VAProfile *profile,
+        VAEntrypoint *entrypoint,
+        VAConfigAttrib *attrib_list,
+        int *num_attribs)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
diff --git a/src/gallium/state_trackers/va/va_display.c b/src/gallium/state_trackers/va/va_display.c
new file mode 100644
index 00000000000..d50d712d4e0
--- /dev/null
+++ b/src/gallium/state_trackers/va/va_display.c
@@ -0,0 +1,66 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ #include <va/va.h>
+ #include <va/va_backend.h>
+ #include "va_private.h"
+
+
+VAStatus  vlVaQueryDisplayAttributes(		VADriverContextP ctx,
+								VADisplayAttribute *attr_list,
+								int *num_attributes)
+{
+        if (!ctx)
+                return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+
+        return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus  vlVaGetDisplayAttributes(		VADriverContextP ctx,
+								VADisplayAttribute *attr_list,
+								int num_attributes)
+{
+        if (!ctx)
+                return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+
+        return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus  vlVaSetDisplayAttributes(		VADriverContextP ctx,
+								VADisplayAttribute *attr_list,
+								int num_attributes)
+{
+        if (!ctx)
+                return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+
+        return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+
diff --git a/src/gallium/state_trackers/va/va_picture.c b/src/gallium/state_trackers/va/va_picture.c
new file mode 100644
index 00000000000..cf7d844a780
--- /dev/null
+++ b/src/gallium/state_trackers/va/va_picture.c
@@ -0,0 +1,60 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Thomas Balling Sørensen & Orasanu Lucian.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <va/va.h>
+#include <va/va_backend.h>
+#include "va_private.h"
+
+VAStatus vlVaBeginPicture(       VADriverContextP ctx,
+                                 VAContextID context,
+                                 VASurfaceID render_target)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaRenderPicture(       VADriverContextP ctx,
+                                  VAContextID context,
+                                  VABufferID *buffers,
+                                  int num_buffers)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaEndPicture(       VADriverContextP ctx,
+                               VAContextID context)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
diff --git a/src/gallium/state_trackers/va/va_surface.c b/src/gallium/state_trackers/va/va_surface.c
new file mode 100644
index 00000000000..7d234bd51b4
--- /dev/null
+++ b/src/gallium/state_trackers/va/va_surface.c
@@ -0,0 +1,120 @@
+/**************************************************************************
+ *
+	* Copyright 2010 Thomas Balling Sørensen & Orasanu Lucian.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <va/va.h>
+#include <va/va_backend.h>
+#include "va_private.h"
+
+VAStatus vlVaCreateSurfaces(       VADriverContextP ctx,
+                                                        int width,
+                                                        int height,
+                                                        int format,
+                                                        int num_surfaces
+                                                        VASurfaceID *surfaces)
+{
+        if (!ctx)
+                return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+        return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaDestroySurfaces(       VADriverContextP ctx,
+                                                        VASurfaceID *surface_list,
+                                                        int num_surfaces)
+        if (!ctx)
+                return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+        return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaSyncSurface(       VADriverContextP ctx,
+                                                        VASurfaceID render_target)
+        if (!ctx)
+                return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+        return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaQuerySurfaceStatus(       VADriverContextP ctx,
+                                                        VASurfaceID render_target,
+                                                        VASurfaceStatus *status)
+{
+        if (!ctx)
+                return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+        return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaPutSurface(       VADriverContextP ctx,
+                                                        VADriverContextP ctx,
+                                                        VASurfaceID surface,
+                                                        void* draw,
+                                                        short srcx,
+                                                        short srcy,
+                                                        unsigned short srcw,
+                                                        unsigned short srch,
+                                                        short destx,
+                                                        short desty,
+                                                        unsigned short destw,
+                                                        unsigned short desth,
+                                                        VARectangle *cliprects,
+                                                        unsigned int number_cliprects,
+                                                        unsigned int flags)
+{
+        if (!ctx)
+                return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+        return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaLockSurface(	VADriverContextP ctx,
+							VASurfaceID surface,
+							unsigned int *fourcc,
+							unsigned int *luma_stride,
+							unsigned int *chroma_u_stride,
+							unsigned int *chroma_v_stride,
+							unsigned int *luma_offset,
+							unsigned int *chroma_u_offset,
+							unsigned int *chroma_v_offset,
+							unsigned int *buffer_name,
+							void **buffer)
+{
+        if (!ctx)
+                return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+        return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
+VAStatus vlVaUnlockSurface(	VADriverContextP ctx,
+							VASurfaceID surface)
+{
+        if (!ctx)
+                return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+        return VA_STATUS_ERROR_UNIMPLEMENTED;
+}
+
-- 
cgit v1.2.3


From 6b6310e67ce1d2c5729d91c704302282998ed35e Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Wed, 27 Oct 2010 20:27:11 +0200
Subject: vl: morefixes to Luc's patch

---
 src/gallium/state_trackers/va/ftab.c       | 2 +-
 src/gallium/state_trackers/va/va_buffer.c  | 2 +-
 src/gallium/state_trackers/va/va_surface.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/va/ftab.c b/src/gallium/state_trackers/va/ftab.c
index 010c04a7d28..a567eee9dd1 100644
--- a/src/gallium/state_trackers/va/ftab.c
+++ b/src/gallium/state_trackers/va/ftab.c
@@ -47,7 +47,7 @@ static struct VADriverVTable vtable =
 	&vlVaBufferSetNumElements, /* VAStatus (*vaBufferSetNumElements) (VADriverContextP ctx,VABufferID buf_id,unsigned int num_elements); */
 	&vlVaMapBuffer, /* VAStatus (*vaMapBuffer) (VADriverContextP ctx,VABufferID buf_id,void **pbuf); */
 	&vlVaUnmapBuffer, /* VAStatus (*vaUnmapBuffer) (VADriverContextP ctx,VABufferID buf_id); */
-	&vlVaDestroyBuffers, /* VAStatus (*vaDestroyBuffer) (VADriverContextP ctx,VABufferID buffer_id); */
+	&vlVaDestroyBuffer, /* VAStatus (*vaDestroyBuffer) (VADriverContextP ctx,VABufferID buffer_id); */
 	&vlVaBeginPicture, /* VAStatus (*vaBeginPicture) (VADriverContextP ctx,VAContextID context,VASurfaceID render_target); */
 	&vlVaRenderPicture, /* VAStatus (*vaRenderPicture) (VADriverContextP ctx,VAContextID context,VABufferID *buffers,int num_buffers); */
 	&vlVaEndPicture, /* VAStatus (*vaEndPicture) (VADriverContextP ctx,VAContextID context); */
diff --git a/src/gallium/state_trackers/va/va_buffer.c b/src/gallium/state_trackers/va/va_buffer.c
index 19afd7b2a30..7608a4264ff 100644
--- a/src/gallium/state_trackers/va/va_buffer.c
+++ b/src/gallium/state_trackers/va/va_buffer.c
@@ -73,7 +73,7 @@ VAStatus vlVaUnmapBuffer(		VADriverContextP ctx,
 	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaDestroyBuffers(		VADriverContextP ctx,
+VAStatus vlVaDestroyBuffer(		VADriverContextP ctx,
                                     VABufferID buffer_id)
 {
 	if (!ctx)
diff --git a/src/gallium/state_trackers/va/va_surface.c b/src/gallium/state_trackers/va/va_surface.c
index 7d234bd51b4..314aa7c3d96 100644
--- a/src/gallium/state_trackers/va/va_surface.c
+++ b/src/gallium/state_trackers/va/va_surface.c
@@ -33,7 +33,7 @@ VAStatus vlVaCreateSurfaces(       VADriverContextP ctx,
                                                         int width,
                                                         int height,
                                                         int format,
-                                                        int num_surfaces
+                                                        int num_surfaces,
                                                         VASurfaceID *surfaces)
 {
         if (!ctx)
@@ -53,6 +53,7 @@ VAStatus vlVaDestroySurfaces(       VADriverContextP ctx,
 
 VAStatus vlVaSyncSurface(       VADriverContextP ctx,
                                                         VASurfaceID render_target)
+{
         if (!ctx)
                 return VA_STATUS_ERROR_INVALID_CONTEXT;
 
@@ -70,7 +71,6 @@ VAStatus vlVaQuerySurfaceStatus(       VADriverContextP ctx,
 }
 
 VAStatus vlVaPutSurface(       VADriverContextP ctx,
-                                                        VADriverContextP ctx,
                                                         VASurfaceID surface,
                                                         void* draw,
                                                         short srcx,
-- 
cgit v1.2.3


From fd2cbe94dfaa98b79c16fb81d7bac84c5c683249 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Thu, 28 Oct 2010 09:40:25 +0200
Subject: vl: small typos and stuff

---
 src/gallium/state_trackers/va/va_config.c  |   2 +-
 src/gallium/state_trackers/va/va_private.h |   4 +-
 src/gallium/state_trackers/va/va_surface.c | 112 ++++++++++++++---------------
 3 files changed, 60 insertions(+), 58 deletions(-)

diff --git a/src/gallium/state_trackers/va/va_config.c b/src/gallium/state_trackers/va/va_config.c
index 1bec28c29a2..5756c2f155d 100644
--- a/src/gallium/state_trackers/va/va_config.c
+++ b/src/gallium/state_trackers/va/va_config.c
@@ -42,7 +42,7 @@ VAStatus vlVaConfigProfiles(       VADriverContextP ctx,
 
 VAStatus vlVaConfigEntrypoints(       VADriverContextP ctx,
                                       VAProfile profile,
-                                      VAEntypoint *entypoint_list,
+                                      VAEntrypoint *entypoint_list,
                                       int *num_entrypoints)
 {
 	if (!ctx)
diff --git a/src/gallium/state_trackers/va/va_private.h b/src/gallium/state_trackers/va/va_private.h
index 3c9922e64fc..bd037c89606 100644
--- a/src/gallium/state_trackers/va/va_private.h
+++ b/src/gallium/state_trackers/va/va_private.h
@@ -40,6 +40,8 @@ VAStatus __vaDriverInit_0_31 (VADriverContextP ctx);
 
 // Private functions:
 struct VADriverVTable vlVaGetVtable();
+
+// Vtable functions:
 VAStatus vlVaTerminate (VADriverContextP ctx);
 VAStatus vlVaQueryConfigProfiles (VADriverContextP ctx, VAProfile *profile_list,int *num_profiles);
 VAStatus vlVaQueryConfigEntrypoints (VADriverContextP ctx, VAProfile profile, VAEntrypoint  *entrypoint_list, int *num_entrypoints);
@@ -55,7 +57,7 @@ VAStatus vlVaCreateBuffer (VADriverContextP ctx,VAContextID context,VABufferType
 VAStatus vlVaBufferSetNumElements (VADriverContextP ctx,VABufferID buf_id,unsigned int num_elements);
 VAStatus vlVaMapBuffer (VADriverContextP ctx,VABufferID buf_id,void **pbuf);
 VAStatus vlVaUnmapBuffer (VADriverContextP ctx,VABufferID buf_id);
-VAStatus vlVaDestroyBuffers (VADriverContextP ctx,VABufferID buffer_id);
+VAStatus vlVaDestroyBuffer (VADriverContextP ctx,VABufferID buffer_id);
 VAStatus vlVaBeginPicture (VADriverContextP ctx,VAContextID context,VASurfaceID render_target);
 VAStatus vlVaRenderPicture (VADriverContextP ctx,VAContextID context,VABufferID *buffers,int num_buffers);
 VAStatus vlVaEndPicture (VADriverContextP ctx,VAContextID context);
diff --git a/src/gallium/state_trackers/va/va_surface.c b/src/gallium/state_trackers/va/va_surface.c
index 314aa7c3d96..ad241adaf41 100644
--- a/src/gallium/state_trackers/va/va_surface.c
+++ b/src/gallium/state_trackers/va/va_surface.c
@@ -30,91 +30,91 @@
 #include "va_private.h"
 
 VAStatus vlVaCreateSurfaces(       VADriverContextP ctx,
-                                                        int width,
-                                                        int height,
-                                                        int format,
-                                                        int num_surfaces,
-                                                        VASurfaceID *surfaces)
+                                   int width,
+                                   int height,
+                                   int format,
+                                   int num_surfaces,
+                                   VASurfaceID *surfaces)
 {
-        if (!ctx)
-                return VA_STATUS_ERROR_INVALID_CONTEXT;
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-        return VA_STATUS_ERROR_UNIMPLEMENTED;
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
 VAStatus vlVaDestroySurfaces(       VADriverContextP ctx,
-                                                        VASurfaceID *surface_list,
-                                                        int num_surfaces)
-        if (!ctx)
-                return VA_STATUS_ERROR_INVALID_CONTEXT;
+                                    VASurfaceID *surface_list,
+                                    int num_surfaces)
+{
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-        return VA_STATUS_ERROR_UNIMPLEMENTED;
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
 VAStatus vlVaSyncSurface(       VADriverContextP ctx,
-                                                        VASurfaceID render_target)
+                                VASurfaceID render_target)
 {
-        if (!ctx)
-                return VA_STATUS_ERROR_INVALID_CONTEXT;
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-        return VA_STATUS_ERROR_UNIMPLEMENTED;
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
 VAStatus vlVaQuerySurfaceStatus(       VADriverContextP ctx,
-                                                        VASurfaceID render_target,
-                                                        VASurfaceStatus *status)
+                                       VASurfaceID render_target,
+                                       VASurfaceStatus *status)
 {
-        if (!ctx)
-                return VA_STATUS_ERROR_INVALID_CONTEXT;
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-        return VA_STATUS_ERROR_UNIMPLEMENTED;
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
 VAStatus vlVaPutSurface(       VADriverContextP ctx,
-                                                        VASurfaceID surface,
-                                                        void* draw,
-                                                        short srcx,
-                                                        short srcy,
-                                                        unsigned short srcw,
-                                                        unsigned short srch,
-                                                        short destx,
-                                                        short desty,
-                                                        unsigned short destw,
-                                                        unsigned short desth,
-                                                        VARectangle *cliprects,
-                                                        unsigned int number_cliprects,
-                                                        unsigned int flags)
+                               VASurfaceID surface,
+                               void* draw,
+                               short srcx,
+                               short srcy,
+                               unsigned short srcw,
+                               unsigned short srch,
+                               short destx,
+                               short desty,
+                               unsigned short destw,
+                               unsigned short desth,
+                               VARectangle *cliprects,
+                               unsigned int number_cliprects,
+                               unsigned int flags)
 {
-        if (!ctx)
-                return VA_STATUS_ERROR_INVALID_CONTEXT;
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-        return VA_STATUS_ERROR_UNIMPLEMENTED;
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
 VAStatus vlVaLockSurface(	VADriverContextP ctx,
-							VASurfaceID surface,
-							unsigned int *fourcc,
-							unsigned int *luma_stride,
-							unsigned int *chroma_u_stride,
-							unsigned int *chroma_v_stride,
-							unsigned int *luma_offset,
-							unsigned int *chroma_u_offset,
-							unsigned int *chroma_v_offset,
-							unsigned int *buffer_name,
-							void **buffer)
+                            VASurfaceID surface,
+                            unsigned int *fourcc,
+                            unsigned int *luma_stride,
+                            unsigned int *chroma_u_stride,
+                            unsigned int *chroma_v_stride,
+                            unsigned int *luma_offset,
+                            unsigned int *chroma_u_offset,
+                            unsigned int *chroma_v_offset,
+                            unsigned int *buffer_name,
+                            void **buffer)
 {
-        if (!ctx)
-                return VA_STATUS_ERROR_INVALID_CONTEXT;
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-        return VA_STATUS_ERROR_UNIMPLEMENTED;
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
 VAStatus vlVaUnlockSurface(	VADriverContextP ctx,
-							VASurfaceID surface)
+                            VASurfaceID surface)
 {
-        if (!ctx)
-                return VA_STATUS_ERROR_INVALID_CONTEXT;
+	if (!ctx)
+		return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-        return VA_STATUS_ERROR_UNIMPLEMENTED;
+	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
-
-- 
cgit v1.2.3


From 3fac09ad873b8a5239f84d07dc12e8b08a117561 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Thu, 28 Oct 2010 12:51:35 +0200
Subject: vl: Initial implementation of vlVaQuerySubpictureFormats.

---
 src/gallium/state_trackers/va/va_subpicture.c | 36 +++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/va/va_subpicture.c b/src/gallium/state_trackers/va/va_subpicture.c
index 9317d313c65..442b1acea5a 100644
--- a/src/gallium/state_trackers/va/va_subpicture.c
+++ b/src/gallium/state_trackers/va/va_subpicture.c
@@ -27,8 +27,29 @@
 
 #include <va/va.h>
 #include <va/va_backend.h>
+#include <pipe/p_format.h>
 #include "va_private.h"
 
+#define NUM_FORMAT_SUPPORTED 2
+
+typedef struct  {
+	enum pipe_format;
+	VAImageFormat       va_format;
+    unsigned int        va_flags;
+} va_subpicture_formats_supported_t;
+
+static const va_subpicture_formats_supported_t va_subpicture_formats_supported[NUM_FORMAT_SUPPORTED] = 
+{
+	{ PIPE_FORMAT_B8G8R8A8_UNORM,
+      { VA_FOURCC('B','G','R','A'), VA_LSB_FIRST, 32,
+        32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 },
+      0 },
+    { PIPE_FORMAT_R8G8B8A8_UNORM, 
+	  { VA_FOURCC('R','G','B','A'), VA_LSB_FIRST, 32,
+        32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 },
+      0 }
+};
+
 VAStatus
 vlVaQuerySubpictureFormats(		VADriverContextP ctx,
                                 VAImageFormat *format_list,
@@ -37,8 +58,19 @@ vlVaQuerySubpictureFormats(		VADriverContextP ctx,
 {
 	if (!ctx)
 		return VA_STATUS_ERROR_INVALID_CONTEXT;
-
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+		
+	if (!(format_list && flags && num_formats))
+		return VA_STATUS_ERROR_UNKNOWN;
+		
+	int n = 0;
+	/* Query supported formats */
+	for (n = 0; n < NUM_FORMAT_SUPPORTED; n++)
+	{
+		flags[n] = va_subpicture_formats_supported[n].va_flags;
+		format_list[n] = va_subpicture_formats_supported[n].va_format;
+	}
+
+	return VA_STATUS_SUCCESS;
 }
 
 
-- 
cgit v1.2.3


From a565f58edaad646942f2174e66ef1343f56ae679 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Thu, 28 Oct 2010 13:40:59 +0200
Subject: vl: enable target va-r600

---
 configure.ac                                  |  2 +-
 src/gallium/state_trackers/va/va_subpicture.c |  6 +--
 src/gallium/targets/Makefile.va               | 62 +++++++++++++++++++++++++++
 src/gallium/targets/va-softpipe/Makefile      | 21 +++++++++
 4 files changed, 87 insertions(+), 4 deletions(-)
 create mode 100644 src/gallium/targets/Makefile.va
 create mode 100644 src/gallium/targets/va-softpipe/Makefile

diff --git a/configure.ac b/configure.ac
index eac293f56aa..8544c5f0a91 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1590,7 +1590,7 @@ AC_ARG_ENABLE([gallium-r600],
 if test "x$enable_gallium_r600" = xyes; then
     if test "x$HAVE_LIBDRM_RADEON" = xyes; then
 	GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
-	gallium_check_st "r600/drm" "dri-r600" "xvmc-r600"
+	gallium_check_st "r600/drm" "dri-r600" "xvmc-r600" "va-r600"
     else
 	AC_MSG_ERROR([libdrm_radeon is missing, cannot build gallium-r600])
     fi
diff --git a/src/gallium/state_trackers/va/va_subpicture.c b/src/gallium/state_trackers/va/va_subpicture.c
index 442b1acea5a..a6d2960e7e5 100644
--- a/src/gallium/state_trackers/va/va_subpicture.c
+++ b/src/gallium/state_trackers/va/va_subpicture.c
@@ -30,7 +30,7 @@
 #include <pipe/p_format.h>
 #include "va_private.h"
 
-#define NUM_FORMAT_SUPPORTED 2
+#define NUM_FORMATS_SUPPORTED 2
 
 typedef struct  {
 	enum pipe_format;
@@ -38,7 +38,7 @@ typedef struct  {
     unsigned int        va_flags;
 } va_subpicture_formats_supported_t;
 
-static const va_subpicture_formats_supported_t va_subpicture_formats_supported[NUM_FORMAT_SUPPORTED] = 
+static const va_subpicture_formats_supported_t va_subpicture_formats_supported[NUM_FORMATS_SUPPORTED] = 
 {
 	{ PIPE_FORMAT_B8G8R8A8_UNORM,
       { VA_FOURCC('B','G','R','A'), VA_LSB_FIRST, 32,
@@ -64,7 +64,7 @@ vlVaQuerySubpictureFormats(		VADriverContextP ctx,
 		
 	int n = 0;
 	/* Query supported formats */
-	for (n = 0; n < NUM_FORMAT_SUPPORTED; n++)
+	for (n = 0; n < NUM_FORMATS_SUPPORTED; n++)
 	{
 		flags[n] = va_subpicture_formats_supported[n].va_flags;
 		format_list[n] = va_subpicture_formats_supported[n].va_format;
diff --git a/src/gallium/targets/Makefile.va b/src/gallium/targets/Makefile.va
new file mode 100644
index 00000000000..efb0a59522a
--- /dev/null
+++ b/src/gallium/targets/Makefile.va
@@ -0,0 +1,62 @@
+# This makefile template is used to build "driver"_drv_video.so
+
+LIBNAME = lib$(LIBBASENAME).so
+VA_LIB_GLOB= lib$(LIBBASENAME).*so*
+VA_MAJOR = 0
+VA_MINOR = 3
+INCLUDES = -I$(TOP)/src/gallium/include \
+	   -I$(TOP)/src/gallium/drivers \
+	   -I$(TOP)/src/gallium/auxiliary \
+	   -I$(TOP)/src/gallium/winsys \
+	   -I$(TOP)/src/gallium/winsys/g3dvl \
+	   $(DRIVER_INCLUDES)
+DEFINES = -DGALLIUM_TRACE -DVER_MAJOR=$(VA_MAJOR) -DVER_MINOR=$(VA_MINOR) $(DRIVER_DEFINES)
+LIBS = $(EXTRA_LIB_PATH) $(DRIVER_LIBS) -lva -lXext -lX11 -lm
+STATE_TRACKER_LIB = $(TOP)/src/gallium/state_trackers/va/libvatracker.a
+
+# XXX: Hack, VA public funcs aren't exported
+OBJECTS = $(C_SOURCES:.c=.o) \
+	  $(ASM_SOURCES:.S=.o) \
+	  $(TOP)/src/gallium/state_trackers/va/*.o
+
+##### RULES #####
+
+.c.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+.S.o:
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $< -o $@
+
+##### TARGETS #####
+
+default: depend symlinks $(TOP)/$(LIB_DIR)/gallium/$(LIBNAME)
+
+$(TOP)/$(LIB_DIR)/gallium/$(LIBNAME): $(OBJECTS) $(PIPE_DRIVERS) $(STATE_TRACKER_LIB) $(TOP)/$(LIB_DIR)/gallium Makefile
+	$(MKLIB) -o $(LIBBASENAME) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+		-major $(VA_MAJOR) -minor $(VA_MINOR) $(MKLIB_OPTIONS) \
+		-install $(TOP)/$(LIB_DIR)/gallium \
+		$(OBJECTS) $(STATE_TRACKER_LIB) $(PIPE_DRIVERS) $(LIBS)
+
+$(TOP)/$(LIB_DIR)/gallium:
+	mkdir -p $@
+
+depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
+	rm -f depend
+	touch depend
+	$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDES) $(C_SOURCES) \
+		$(ASM_SOURCES) 2> /dev/null
+
+# Emacs tags
+tags:
+	etags `find . -name \*.[ch]` `find ../include`
+
+# Remove .o and backup files
+clean:
+	-rm -f *.o *~ *.so $(SYMLINKS)
+	-rm -f depend depend.bak
+
+install: default
+	$(INSTALL) -d $(DESTDIR)$(VA_LIB_INSTALL_DIR)
+	$(MINSTALL) -m 755 $(TOP)/$(LIB_DIR)/gallium/$(VA_LIB_GLOB) $(DESTDIR)$(VA_LIB_INSTALL_DIR)
+
+include depend
diff --git a/src/gallium/targets/va-softpipe/Makefile b/src/gallium/targets/va-softpipe/Makefile
new file mode 100644
index 00000000000..a58df36a966
--- /dev/null
+++ b/src/gallium/targets/va-softpipe/Makefile
@@ -0,0 +1,21 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBBASENAME = softpipe_drv_video
+
+DRIVER_DEFINES = -DGALLIUM_SOFTPIPE
+DRIVER_INCLUDES =
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/winsys/sw/xlib/libws_xlib.a \
+	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+	$(TOP)/src/gallium/auxiliary/libgallium.a
+
+C_SOURCES = \
+	$(TOP)/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+
+DRIVER_LIBS =
+
+include ../Makefile.va
+
+symlinks:
-- 
cgit v1.2.3


From 8ba4c96f8204003ff0d5247d71c0855827810560 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Thu, 28 Oct 2010 14:32:54 +0200
Subject: vl: rest of va stubs

---
 src/gallium/state_trackers/va/Makefile     |  3 ++-
 src/gallium/state_trackers/va/va_config.c  |  5 +++--
 src/gallium/state_trackers/va/va_context.c |  1 -
 src/gallium/state_trackers/va/va_image.c   |  4 ++--
 src/gallium/targets/va-r600/Makefile       | 26 ++++++++++++++++++++++++++
 src/gallium/targets/va-r600/target.c       | 24 ++++++++++++++++++++++++
 6 files changed, 57 insertions(+), 6 deletions(-)
 create mode 100644 src/gallium/targets/va-r600/Makefile
 create mode 100644 src/gallium/targets/va-r600/target.c

diff --git a/src/gallium/state_trackers/va/Makefile b/src/gallium/state_trackers/va/Makefile
index 1e22bb50d1d..dd303ebace9 100644
--- a/src/gallium/state_trackers/va/Makefile
+++ b/src/gallium/state_trackers/va/Makefile
@@ -19,7 +19,8 @@ C_SOURCES = htab.c \
 	    va_buffer.c \
 	    va_config.c \
             va_picture.c \
-            va_surface.c
+            va_surface.c \
+	    va_display.c
 	    
 
diff --git a/src/gallium/state_trackers/va/va_config.c b/src/gallium/state_trackers/va/va_config.c
index 5756c2f155d..591d113a916 100644
--- a/src/gallium/state_trackers/va/va_config.c
+++ b/src/gallium/state_trackers/va/va_config.c
@@ -29,7 +29,7 @@
 #include <va/va_backend.h>
 #include "va_private.h"
 
-VAStatus vlVaConfigProfiles(       VADriverContextP ctx,
+VAStatus vlVaQueryConfigProfiles(       VADriverContextP ctx,
                                    VAProfile *profile_list,
                                    int *num_profiles)
 {
@@ -40,7 +40,7 @@ VAStatus vlVaConfigProfiles(       VADriverContextP ctx,
 }
 
 
-VAStatus vlVaConfigEntrypoints(       VADriverContextP ctx,
+VAStatus vlVaQueryConfigEntrypoints(       VADriverContextP ctx,
                                       VAProfile profile,
                                       VAEntrypoint *entypoint_list,
                                       int *num_entrypoints)
@@ -98,3 +98,4 @@ VAStatus vlVaQueryConfigAttributes(       VADriverContextP ctx,
 
 	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
+
diff --git a/src/gallium/state_trackers/va/va_context.c b/src/gallium/state_trackers/va/va_context.c
index 7ef84606305..1e3ab9cb22e 100644
--- a/src/gallium/state_trackers/va/va_context.c
+++ b/src/gallium/state_trackers/va/va_context.c
@@ -84,6 +84,5 @@ VAStatus vlVaTerminate(       VADriverContextP ctx)
 {
 	if (!ctx)
 		return VA_STATUS_ERROR_INVALID_CONTEXT;
-
 	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
diff --git a/src/gallium/state_trackers/va/va_image.c b/src/gallium/state_trackers/va/va_image.c
index b1f990a15eb..40a96d3ea48 100644
--- a/src/gallium/state_trackers/va/va_image.c
+++ b/src/gallium/state_trackers/va/va_image.c
@@ -66,7 +66,7 @@ VAStatus vlVaDeriveImage(	VADriverContextP ctx,
 	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlDestroyImage(	VADriverContextP ctx,
+VAStatus vlVaDestroyImage(	VADriverContextP ctx,
                             VAImageID image)
 {
 	if (!ctx)
@@ -76,7 +76,7 @@ VAStatus vlDestroyImage(	VADriverContextP ctx,
 	return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlSetImagePalette(	VADriverContextP ctx,
+VAStatus vlVaSetImagePalette(	VADriverContextP ctx,
                             VAImageID image,
                             unsigned char *palette)
 {
diff --git a/src/gallium/targets/va-r600/Makefile b/src/gallium/targets/va-r600/Makefile
new file mode 100644
index 00000000000..03ca8edaf25
--- /dev/null
+++ b/src/gallium/targets/va-r600/Makefile
@@ -0,0 +1,26 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBBASENAME = r600_drv_video
+
+DRIVER_DEFINES = -DGALLIUM_SOFTPIPE
+DRIVER_INCLUDES =
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/drivers/r600/libr600.a \
+	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
+        $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+        $(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/auxiliary/libgallium.a
+
+C_SOURCES = \
+	target.c \
+	$(COMMON_GALLIUM_SOURCES) \
+	$(DRIVER_SOURCES)
+
+DRIVER_LIBS = $(shell pkg-config libdrm_radeon --libs) -lXfixes
+
+include ../Makefile.va
+
+symlinks:
diff --git a/src/gallium/targets/va-r600/target.c b/src/gallium/targets/va-r600/target.c
new file mode 100644
index 00000000000..8753e2bab17
--- /dev/null
+++ b/src/gallium/targets/va-r600/target.c
@@ -0,0 +1,24 @@
+#include "state_tracker/drm_driver.h"
+#include "target-helpers/inline_debug_helper.h"
+#include "r600/drm/r600_drm_public.h"
+#include "r600/r600_public.h"
+
+static struct pipe_screen *create_screen(int fd)
+{
+   struct radeon *radeon;
+   struct pipe_screen *screen;
+
+   radeon = r600_drm_winsys_create(fd);
+   if (!radeon)
+      return NULL;
+
+   screen = r600_screen_create(radeon);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen)
-- 
cgit v1.2.3


From 2b296ec77c2b95e7632b45100de5a0878ac2a294 Mon Sep 17 00:00:00 2001
From: Thomas Balling Sørensen <tball@tball-laptop.(none)>
Date: Thu, 28 Oct 2010 22:46:28 +0200
Subject: vl: initial implementation of vlVaQueryImageFormats(),
 vlVaCreateImage(), vlVaQuerySubpictureFormats(), vlVaCreateSurfaces(),
 vlVaQueryConfigEntrypoints(), vlVaQueryConfigProfiles()

---
 src/gallium/state_trackers/va/ftab.c          |  4 +-
 src/gallium/state_trackers/va/htab.c          |  7 +--
 src/gallium/state_trackers/va/va_config.c     | 38 ++++++++++++++--
 src/gallium/state_trackers/va/va_context.c    | 27 ++++++++++--
 src/gallium/state_trackers/va/va_display.c    |  6 ++-
 src/gallium/state_trackers/va/va_image.c      | 62 +++++++++++++++++++++++++--
 src/gallium/state_trackers/va/va_picture.c    |  1 +
 src/gallium/state_trackers/va/va_private.h    | 24 +++++++++++
 src/gallium/state_trackers/va/va_subpicture.c | 20 ++++-----
 src/gallium/state_trackers/va/va_surface.c    | 49 ++++++++++++++++++++-
 10 files changed, 211 insertions(+), 27 deletions(-)

diff --git a/src/gallium/state_trackers/va/ftab.c b/src/gallium/state_trackers/va/ftab.c
index a567eee9dd1..999287e7a7e 100644
--- a/src/gallium/state_trackers/va/ftab.c
+++ b/src/gallium/state_trackers/va/ftab.c
@@ -30,6 +30,8 @@
 #include <va/va_backend.h>
 #include "va_private.h"
 
+struct VADriverVTable vlVaGetVtable();
+
 static struct VADriverVTable vtable =
 {
 	&vlVaTerminate, /* VAStatus (*vaTerminate) ( VADriverContextP ctx ); */
@@ -125,7 +127,7 @@ static struct VADriverVTable vtable =
                 unsigned int *buffer_name,
                 void **buffer); */
 	&vlVaUnlockSurface, /* VAStatus (*vaUnlockSurface) (VADriverContextP ctx,VASurfaceID surface); */
-	0x44 /* struct VADriverVTableGLX *glx; "Optional" */
+	NULL /* struct VADriverVTableGLX *glx; "Optional" */
 };
 
 struct VADriverVTable vlVaGetVtable()
diff --git a/src/gallium/state_trackers/va/htab.c b/src/gallium/state_trackers/va/htab.c
index 069c7930927..2187507c6a4 100644
--- a/src/gallium/state_trackers/va/htab.c
+++ b/src/gallium/state_trackers/va/htab.c
@@ -29,9 +29,10 @@
 #include <os/os_thread.h>
 #include "va_private.h"
 
-#define VL_HANDLES
-
-typedef uint32_t vlHandle;
+boolean vlCreateHTAB(void);
+void vlDestroyHTAB(void);
+vlHandle vlAddDataHTAB(void *data);
+void* vlGetDataHTAB(vlHandle handle);
 
 #ifdef VL_HANDLES
 static struct handle_table *htab = NULL;
diff --git a/src/gallium/state_trackers/va/va_config.c b/src/gallium/state_trackers/va/va_config.c
index 591d113a916..1589abf7cfa 100644
--- a/src/gallium/state_trackers/va/va_config.c
+++ b/src/gallium/state_trackers/va/va_config.c
@@ -27,6 +27,7 @@
 
 #include <va/va.h>
 #include <va/va_backend.h>
+#include <util/u_debug.h>
 #include "va_private.h"
 
 VAStatus vlVaQueryConfigProfiles(       VADriverContextP ctx,
@@ -36,19 +37,48 @@ VAStatus vlVaQueryConfigProfiles(       VADriverContextP ctx,
 	if (!ctx)
 		return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+	int i = 0;
+
+    profile_list[i++] = VAProfileMPEG2Simple;
+	*num_profiles = i;
+
+	return VA_STATUS_SUCCESS;
 }
 
 
 VAStatus vlVaQueryConfigEntrypoints(       VADriverContextP ctx,
                                       VAProfile profile,
-                                      VAEntrypoint *entypoint_list,
+                                      VAEntrypoint *entrypoint_list,
                                       int *num_entrypoints)
 {
 	if (!ctx)
 		return VA_STATUS_ERROR_INVALID_CONTEXT;
-
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+		
+	VAStatus vaStatus = VA_STATUS_SUCCESS;
+
+    switch (profile) {
+    case VAProfileMPEG2Simple:
+    case VAProfileMPEG2Main:
+		VA_INFO("Using profile %08x\n",profile);
+        *num_entrypoints = 1;
+        entrypoint_list[0] = VAEntrypointMoComp;
+        break;
+
+    case VAProfileH264Baseline:
+    case VAProfileH264Main:
+    case VAProfileH264High:
+        vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+        *num_entrypoints = 0;
+        break;
+
+    default:
+		VA_ERROR("Unsupported profile %08x\n",profile);
+        vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+        *num_entrypoints = 0;
+        break;
+    }
+
+    return vaStatus;
 }
 
 
diff --git a/src/gallium/state_trackers/va/va_context.c b/src/gallium/state_trackers/va/va_context.c
index 1e3ab9cb22e..cdb20cc0eb2 100644
--- a/src/gallium/state_trackers/va/va_context.c
+++ b/src/gallium/state_trackers/va/va_context.c
@@ -27,7 +27,10 @@
 
 #include <pipe/p_compiler.h>
 #include <pipe/p_video_context.h>
+#include <pipe/p_screen.h>
+#include <vl_winsys.h>
 #include <util/u_debug.h>
+#include <util/u_memory.h>
 #include <va/va.h>
 #include <va/va_backend.h>
 #include "va_private.h"
@@ -37,19 +40,35 @@
 PUBLIC
 VAStatus __vaDriverInit_0_31 (VADriverContextP ctx)
 {
+	vlVaDriverContextPriv *driver_context = NULL;
+	
 	if (!ctx)
 		return VA_STATUS_ERROR_INVALID_CONTEXT;
-
+		
+		
+	/* Create private driver context */
+	driver_context = CALLOC(1,sizeof(vlVaDriverContextPriv));
+	if (!driver_context)
+		return VA_STATUS_ERROR_ALLOCATION_FAILED;
+		
+    driver_context->vscreen = vl_screen_create(ctx->native_dpy, ctx->x11_screen);
+	if (!driver_context->vscreen)
+	{
+		FREE(driver_context);
+		return VA_STATUS_ERROR_ALLOCATION_FAILED;
+	}
+		
 	ctx->str_vendor = "mesa gallium vaapi";
 	ctx->vtable = vlVaGetVtable();
 	ctx->max_attributes = 1;
 	ctx->max_display_attributes = 1;
-	ctx->max_entrypoints = 1;
-	ctx->max_image_formats = 1;
+	ctx->max_entrypoints = VA_MAX_ENTRYPOINTS;
+	ctx->max_image_formats = VA_MAX_IMAGE_FORMATS_SUPPORTED;
 	ctx->max_profiles = 1;
-	ctx->max_subpic_formats = 1;
+	ctx->max_subpic_formats = VA_MAX_SUBPIC_FORMATS_SUPPORTED;
 	ctx->version_major = 3;
 	ctx->version_minor = 1;
+	ctx->pDriverData = (void *)driver_context;
 
 	VA_INFO("vl_screen_pointer %p\n",ctx->native_dpy);
 
diff --git a/src/gallium/state_trackers/va/va_display.c b/src/gallium/state_trackers/va/va_display.c
index d50d712d4e0..1aaaf7ccc53 100644
--- a/src/gallium/state_trackers/va/va_display.c
+++ b/src/gallium/state_trackers/va/va_display.c
@@ -37,8 +37,12 @@ VAStatus  vlVaQueryDisplayAttributes(		VADriverContextP ctx,
         if (!ctx)
                 return VA_STATUS_ERROR_INVALID_CONTEXT;
 
+		if (!(attr_list && num_attributes))
+			return VA_STATUS_ERROR_UNKNOWN;
 
-        return VA_STATUS_ERROR_UNIMPLEMENTED;
+        *num_attributes = 0;
+
+		return VA_STATUS_SUCCESS;
 }
 
 VAStatus  vlVaGetDisplayAttributes(		VADriverContextP ctx,
diff --git a/src/gallium/state_trackers/va/va_image.c b/src/gallium/state_trackers/va/va_image.c
index 40a96d3ea48..8d20bfa9174 100644
--- a/src/gallium/state_trackers/va/va_image.c
+++ b/src/gallium/state_trackers/va/va_image.c
@@ -27,10 +27,30 @@
 
 #include <util/u_memory.h>
 #include <util/u_format.h>
+#include <util/u_debug.h>
+#include <pipe/p_format.h>
 #include <va/va.h>
 #include <va/va_backend.h>
 #include "va_private.h"
 
+typedef struct  {
+	enum pipe_format pipe_format;
+	VAImageFormat       va_format;
+} va_image_formats_supported_t;
+
+static const va_image_formats_supported_t va_image_formats_supported[VA_MAX_IMAGE_FORMATS_SUPPORTED] = 
+{
+	{ PIPE_FORMAT_B8G8R8A8_UNORM,
+      { VA_FOURCC('B','G','R','A'), VA_LSB_FIRST, 32, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 }},
+    { PIPE_FORMAT_R8G8B8A8_UNORM, 
+	  { VA_FOURCC_RGBA, VA_LSB_FIRST, 32, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 }}
+};
+
+boolean vlCreateHTAB(void);
+void vlDestroyHTAB(void);
+vlHandle vlAddDataHTAB(void *data);
+void* vlGetDataHTAB(vlHandle handle);
+
 VAStatus
 vlVaQueryImageFormats ( 	VADriverContextP ctx,
                             VAImageFormat *format_list,
@@ -39,8 +59,20 @@ vlVaQueryImageFormats ( 	VADriverContextP ctx,
 	if (!ctx)
 		return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+	if (!(format_list && num_formats))
+		return VA_STATUS_ERROR_UNKNOWN;
+		
+	int n = 0;
+	
+	num_formats[0] = VA_MAX_IMAGE_FORMATS_SUPPORTED;
+	
+	/* Query supported formats */
+	for (n = 0; n < VA_MAX_IMAGE_FORMATS_SUPPORTED; n++)
+	{
+		format_list[n] = va_image_formats_supported[n].va_format;
+	}
+
+	return VA_STATUS_SUCCESS;
 }
 
 VAStatus vlVaCreateImage(	VADriverContextP ctx,
@@ -52,7 +84,31 @@ VAStatus vlVaCreateImage(	VADriverContextP ctx,
 	if (!ctx)
 		return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+	if(!format)
+		return VA_STATUS_ERROR_UNKNOWN;
+		
+	if (!(width && height))
+		return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
+		
+	if (!vlCreateHTAB())
+		return VA_STATUS_ERROR_UNKNOWN; 
+		
+	switch (format->fourcc) {
+	case VA_FOURCC('B','G','R','A'):
+		VA_INFO("Creating BGRA image of size %dx%d\n",width,height);
+	break;
+	case VA_FOURCC_RGBA:
+		VA_INFO("Creating RGBA image of size %dx%d\n",width,height);
+	break;
+	default:
+		VA_ERROR("Couldn't create image of type %0x08\n",format->fourcc);
+		return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
+	break;
+	}
+		
+	VA_INFO("Image %p created successfully\n",format);
+	
+	return VA_STATUS_SUCCESS;
 }
 
 VAStatus vlVaDeriveImage(	VADriverContextP ctx,
diff --git a/src/gallium/state_trackers/va/va_picture.c b/src/gallium/state_trackers/va/va_picture.c
index cf7d844a780..3603dfb6fed 100644
--- a/src/gallium/state_trackers/va/va_picture.c
+++ b/src/gallium/state_trackers/va/va_picture.c
@@ -27,6 +27,7 @@
 
 #include <va/va.h>
 #include <va/va_backend.h>
+#include <util/u_debug.h>
 #include "va_private.h"
 
 VAStatus vlVaBeginPicture(       VADriverContextP ctx,
diff --git a/src/gallium/state_trackers/va/va_private.h b/src/gallium/state_trackers/va/va_private.h
index bd037c89606..625c6cdbe1b 100644
--- a/src/gallium/state_trackers/va/va_private.h
+++ b/src/gallium/state_trackers/va/va_private.h
@@ -30,17 +30,41 @@
 
 #include <va/va.h>
 #include <va/va_backend.h>
+#include <pipe/p_format.h>
+#include <pipe/p_state.h>
+
 #define VA_DEBUG(_str,...) debug_printf("[Gallium VA backend]: " _str,__VA_ARGS__)
 #define VA_INFO(_str,...) VA_DEBUG("INFO: " _str,__VA_ARGS__)
 #define VA_WARNING(_str,...) VA_DEBUG("WARNING: " _str,__VA_ARGS__)
 #define VA_ERROR(_str,...) VA_DEBUG("ERROR: " _str,__VA_ARGS__)
 
+#define VA_MAX_IMAGE_FORMATS_SUPPORTED 2
+#define VA_MAX_SUBPIC_FORMATS_SUPPORTED 2
+#define VA_MAX_ENTRYPOINTS 1
+
+#define VL_HANDLES
+
+typedef unsigned int vlHandle;
+
+typedef struct {
+	struct vl_screen *vscreen;
+	struct pipe_surface *backbuffer;
+} vlVaDriverContextPriv;
+
+typedef struct {
+	unsigned int width;
+	unsigned int height;
+	enum pipe_video_chroma_format format;
+	VADriverContextP ctx;
+} vlVaSurfacePriv;
+
 // Public functions:
 VAStatus __vaDriverInit_0_31 (VADriverContextP ctx);
 
 // Private functions:
 struct VADriverVTable vlVaGetVtable();
 
+
 // Vtable functions:
 VAStatus vlVaTerminate (VADriverContextP ctx);
 VAStatus vlVaQueryConfigProfiles (VADriverContextP ctx, VAProfile *profile_list,int *num_profiles);
diff --git a/src/gallium/state_trackers/va/va_subpicture.c b/src/gallium/state_trackers/va/va_subpicture.c
index a6d2960e7e5..910e5bd7b70 100644
--- a/src/gallium/state_trackers/va/va_subpicture.c
+++ b/src/gallium/state_trackers/va/va_subpicture.c
@@ -30,23 +30,20 @@
 #include <pipe/p_format.h>
 #include "va_private.h"
 
-#define NUM_FORMATS_SUPPORTED 2
 
 typedef struct  {
-	enum pipe_format;
+	enum pipe_format	pipe_format;
 	VAImageFormat       va_format;
     unsigned int        va_flags;
 } va_subpicture_formats_supported_t;
 
-static const va_subpicture_formats_supported_t va_subpicture_formats_supported[NUM_FORMATS_SUPPORTED] = 
+static const va_subpicture_formats_supported_t va_subpicture_formats_supported[VA_MAX_SUBPIC_FORMATS_SUPPORTED + 1] = 
 {
 	{ PIPE_FORMAT_B8G8R8A8_UNORM,
-      { VA_FOURCC('B','G','R','A'), VA_LSB_FIRST, 32,
-        32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 },
+      { VA_FOURCC('B','G','R','A'), VA_LSB_FIRST, 32, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 },
       0 },
     { PIPE_FORMAT_R8G8B8A8_UNORM, 
-	  { VA_FOURCC('R','G','B','A'), VA_LSB_FIRST, 32,
-        32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 },
+	  { VA_FOURCC_RGBA, VA_LSB_FIRST, 32, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 },
       0 }
 };
 
@@ -62,12 +59,15 @@ vlVaQuerySubpictureFormats(		VADriverContextP ctx,
 	if (!(format_list && flags && num_formats))
 		return VA_STATUS_ERROR_UNKNOWN;
 		
+	num_formats[0] = VA_MAX_SUBPIC_FORMATS_SUPPORTED;
+		
 	int n = 0;
 	/* Query supported formats */
-	for (n = 0; n < NUM_FORMATS_SUPPORTED; n++)
+	for (n = 0; n < VA_MAX_SUBPIC_FORMATS_SUPPORTED ; n++)
 	{
-		flags[n] = va_subpicture_formats_supported[n].va_flags;
-		format_list[n] = va_subpicture_formats_supported[n].va_format;
+		const va_subpicture_formats_supported_t * const format_map = &va_subpicture_formats_supported[n];
+		flags[n] = format_map->va_flags;
+		format_list[n] = format_map->va_format;
 	}
 
 	return VA_STATUS_SUCCESS;
diff --git a/src/gallium/state_trackers/va/va_surface.c b/src/gallium/state_trackers/va/va_surface.c
index ad241adaf41..a86c806248a 100644
--- a/src/gallium/state_trackers/va/va_surface.c
+++ b/src/gallium/state_trackers/va/va_surface.c
@@ -27,8 +27,31 @@
 
 #include <va/va.h>
 #include <va/va_backend.h>
+#include <util/u_debug.h>
+#include <util/u_memory.h>
 #include "va_private.h"
 
+boolean vlCreateHTAB(void);
+void vlDestroyHTAB(void);
+vlHandle vlAddDataHTAB(void *data);
+void* vlGetDataHTAB(vlHandle handle);
+
+static enum pipe_video_chroma_format VaRTFormatToPipe(unsigned int va_type)
+{
+   switch (va_type) {
+      case VA_RT_FORMAT_YUV420:
+         return PIPE_VIDEO_CHROMA_FORMAT_420;
+      case VA_RT_FORMAT_YUV422:
+         return PIPE_VIDEO_CHROMA_FORMAT_422;
+      case VA_RT_FORMAT_YUV444:
+         return PIPE_VIDEO_CHROMA_FORMAT_444;
+      default:
+         assert(0);
+   }
+
+   return -1;
+}
+
 VAStatus vlVaCreateSurfaces(       VADriverContextP ctx,
                                    int width,
                                    int height,
@@ -39,7 +62,31 @@ VAStatus vlVaCreateSurfaces(       VADriverContextP ctx,
 	if (!ctx)
 		return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+    /* We only support one format */
+    if (VA_RT_FORMAT_YUV420 != format)
+        return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
+		
+	if (!(width && height))
+		return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
+		
+	if (!vlCreateHTAB())
+		return VA_STATUS_ERROR_UNKNOWN; 
+		
+	vlVaSurfacePriv *va_surface = (vlVaSurfacePriv *)CALLOC(num_surfaces,sizeof(vlVaSurfacePriv));
+	if (!va_surface)
+		return VA_STATUS_ERROR_ALLOCATION_FAILED;
+		
+	int n = 0;
+	for (n = 0; n < num_surfaces; n++)
+	{
+		va_surface[n].width = width;
+		va_surface[n].height = height;
+		va_surface[n].format = VaRTFormatToPipe(format);
+		va_surface[n].ctx = ctx;
+		surfaces[n] = (VASurfaceID *)vlAddDataHTAB((void *)(va_surface + n));
+	}
+
+	return VA_STATUS_SUCCESS;
 }
 
 VAStatus vlVaDestroySurfaces(       VADriverContextP ctx,
-- 
cgit v1.2.3


From 0b75203c5962475a9cbe27e31373750465f9d949 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 30 Oct 2010 01:42:16 +0200
Subject: First try of field based mc

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 209 ++++++++++++++++++++---
 1 file changed, 190 insertions(+), 19 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 977c8d67b55..0973d5e2fc4 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -198,13 +198,45 @@ create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
    return true;
 }
 
-#if 0
-static void
+static bool
 create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
-   assert(false);
+   struct ureg_program *shader;
+   struct ureg_src vpos, vtex[5];
+   struct ureg_dst o_vpos, o_vtex[5];
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return false;
+
+   vpos = ureg_DECL_vs_input(shader, 0);
+   for (i = 0; i < 5; ++i)
+      vtex[i] = ureg_DECL_vs_input(shader, i + 1);
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+   for (i = 0; i < 5; ++i)
+      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
+
+   /*
+    * o_vpos = vpos
+    * o_vtex[0..2] = vtex[0..2]
+    * o_vtex[3] = vpos + vtex[3] // Apply motion vector
+    * o_vtex[4] = vpos + vtex[4] // Apply motion vector
+    */
+   ureg_MOV(shader, o_vpos, vpos);
+   for (i = 0; i < 3; ++i)
+      ureg_MOV(shader, o_vtex[i], vtex[i]);
+   ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
+   ureg_ADD(shader, o_vtex[4], vpos, vtex[4]);
+
+   ureg_END(shader);
+
+   r->p_vs[1] = ureg_create_shader_and_destroy(shader, r->pipe);
+   if (!r->p_vs[1])
+      return false;
+
+   return true;
 }
-#endif
 
 static bool
 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
@@ -254,13 +286,67 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    return true;
 }
 
-#if 0
-static void
+static bool
 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
-   assert(false);
+   struct ureg_program *shader;
+   struct ureg_src tc[5];
+   struct ureg_src sampler[4];
+   struct ureg_dst texel, ref, tmp;
+   struct ureg_dst fragment;
+   unsigned i, label;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
+
+   for (i = 0; i < 5; ++i)
+      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+   for (i = 0; i < 4; ++i)
+      sampler[i] = ureg_DECL_sampler(shader, i);
+
+   texel = ureg_DECL_temporary(shader);
+   ref = ureg_DECL_temporary(shader);
+   tmp = ureg_DECL_temporary(shader);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /*
+    * texel.r = tex(tc[0], sampler[0])
+    * texel.g = tex(tc[1], sampler[1])
+    * texel.b = tex(tc[2], sampler[2])
+    * ref = tex(tc[3], sampler[3])
+    * fragment = texel * scale + ref
+    */
+   for (i = 0; i < 3; ++i) {
+      /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
+      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
+      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
+   }
+
+   /*
+   ureg_MOD(shader, tmp, ureg_scalar(tc[4], TGSI_SWIZZLE_Y), ureg_scalar(ureg_imm1f(shader, 2), TGSI_SWIZZLE_Y));
+   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
+   ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
+   ureg_ELSE(shader, &label);
+   ureg_MOV(shader, ref, ureg_scalar(ureg_imm1f(shader, 0xFF), TGSI_SWIZZLE_X));
+   ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[4], sampler[3]);
+   ureg_ENDIF(shader);
+   */
+
+   ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[4], sampler[3]);
+   ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
+
+   ureg_release_temporary(shader, tmp);
+   ureg_release_temporary(shader, texel);
+   ureg_release_temporary(shader, ref);
+   ureg_END(shader);
+
+   r->p_fs[1] = ureg_create_shader_and_destroy(shader, r->pipe);
+   if (!r->p_fs[1])
+      return false;
+
+   return true;
 }
-#endif
 
 static bool
 create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
@@ -304,13 +390,44 @@ create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
    return true;
 }
 
-#if 0
-static void
+static bool
 create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
-   assert(false);
+   struct ureg_program *shader;
+   struct ureg_src vpos, vtex[7];
+   struct ureg_dst o_vpos, o_vtex[7];
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return false;
+
+   vpos = ureg_DECL_vs_input(shader, 0);
+   for (i = 0; i < 7; ++i)
+      vtex[i] = ureg_DECL_vs_input(shader, i + 1);
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+   for (i = 0; i < 7; ++i)
+      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
+
+   /*
+    * o_vpos = vpos
+    * o_vtex[0..2] = vtex[0..2]
+    * o_vtex[3..6] = vpos + vtex[3..6] // Apply motion vector
+    */
+   ureg_MOV(shader, o_vpos, vpos);
+   for (i = 0; i < 3; ++i)
+      ureg_MOV(shader, o_vtex[i], vtex[i]);
+   for (i = 3; i < 7; ++i)
+      ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
+
+   ureg_END(shader);
+
+   r->b_vs[1] = ureg_create_shader_and_destroy(shader, r->pipe);
+   if (!r->b_vs[1])
+      return false;
+
+   return true;
 }
-#endif
 
 static bool
 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
@@ -366,13 +483,59 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    return true;
 }
 
-#if 0
-static void
+static bool
 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
-   assert(false);
+   struct ureg_program *shader;
+   struct ureg_src tc[5];
+   struct ureg_src sampler[5];
+   struct ureg_dst texel, ref[2];
+   struct ureg_dst fragment;
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
+
+   for (i = 0; i < 5; ++i)  {
+      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+      sampler[i] = ureg_DECL_sampler(shader, i);
+   }
+   texel = ureg_DECL_temporary(shader);
+   ref[0] = ureg_DECL_temporary(shader);
+   ref[1] = ureg_DECL_temporary(shader);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /*
+    * texel.r = tex(tc[0], sampler[0])
+    * texel.g = tex(tc[1], sampler[1])
+    * texel.b = tex(tc[2], sampler[2])
+    * ref[0..1 = tex(tc[3..4], sampler[3..4])
+    * ref[0] = lerp(ref[0], ref[1], 0.5)
+    * fragment = texel * scale + ref[0]
+    */
+   for (i = 0; i < 3; ++i) {
+      /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
+      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
+      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
+   }
+   ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
+   ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
+   ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
+
+   ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
+
+   ureg_release_temporary(shader, texel);
+   ureg_release_temporary(shader, ref[0]);
+   ureg_release_temporary(shader, ref[1]);
+   ureg_END(shader);
+
+   r->b_fs[1] = ureg_create_shader_and_destroy(shader, r->pipe);
+   if (!r->b_fs[1])
+      return false;
+
+   return true;
 }
-#endif
 
 static void
 xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
@@ -500,9 +663,13 @@ init_shaders(struct vl_mpeg12_mc_renderer *r)
    create_intra_vert_shader(r);
    create_intra_frag_shader(r);
    create_frame_pred_vert_shader(r);
+   create_field_pred_vert_shader(r);
    create_frame_pred_frag_shader(r);
+   create_field_pred_frag_shader(r);
    create_frame_bi_pred_vert_shader(r);
+   create_field_bi_pred_vert_shader(r);
    create_frame_bi_pred_frag_shader(r);
+   create_field_bi_pred_frag_shader(r);
 
    return true;
 }
@@ -515,9 +682,13 @@ cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
    r->pipe->delete_vs_state(r->pipe, r->i_vs);
    r->pipe->delete_fs_state(r->pipe, r->i_fs);
    r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
+   r->pipe->delete_vs_state(r->pipe, r->p_vs[1]);
    r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
+   r->pipe->delete_fs_state(r->pipe, r->p_fs[1]);
    r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
+   r->pipe->delete_vs_state(r->pipe, r->b_vs[1]);
    r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
+   r->pipe->delete_fs_state(r->pipe, r->b_fs[1]);
 }
 
 static bool
@@ -1099,7 +1270,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
       vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
    }
 
-   if (false /*num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0 */ ) {
+   if (num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->past->texture;
@@ -1129,7 +1300,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
       vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
    }
 
-   if (false /*num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0*/ ) {
+   if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->future->texture;
@@ -1161,7 +1332,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
       vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
    }
 
-   if (false /*num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0 */ ) {
+   if (num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
       r->textures.individual.ref[0] = r->past->texture;
-- 
cgit v1.2.3


From 2e4a7b7306a14808fc3143959dc3bf4d2e7f023b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 3 Nov 2010 00:38:07 +0100
Subject: Fix zero block handling for field based mc

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 65 ++++++++++++++----------
 1 file changed, 39 insertions(+), 26 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 0973d5e2fc4..ec89a116913 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -294,7 +294,7 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_src sampler[4];
    struct ureg_dst texel, ref, tmp;
    struct ureg_dst fragment;
-   unsigned i, label;
+   unsigned i;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -323,16 +323,6 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
    }
 
-   /*
-   ureg_MOD(shader, tmp, ureg_scalar(tc[4], TGSI_SWIZZLE_Y), ureg_scalar(ureg_imm1f(shader, 2), TGSI_SWIZZLE_Y));
-   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
-   ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
-   ureg_ELSE(shader, &label);
-   ureg_MOV(shader, ref, ureg_scalar(ureg_imm1f(shader, 0xFF), TGSI_SWIZZLE_X));
-   ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[4], sampler[3]);
-   ureg_ENDIF(shader);
-   */
-
    ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[4], sampler[3]);
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
 
@@ -886,11 +876,15 @@ get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
 }
 
 static void
-gen_block_verts(struct vert_stream_0 *vb, unsigned cbp, unsigned mbx, unsigned mby,
+gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
                 const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
                 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
                 bool use_zeroblocks, struct vertex2f *zero_blocks)
 {
+   unsigned cbp = mb->cbp;
+   unsigned mbx = mb->mbx;
+   unsigned mby = mb->mby;
+
    struct vertex2f v;
 
    assert(vb);
@@ -918,7 +912,7 @@ gen_block_verts(struct vert_stream_0 *vb, unsigned cbp, unsigned mbx, unsigned m
       or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
       for this channel is defined for this block) */
 
-   if (!use_zeroblocks || cbp & luma_mask) {
+   if (!use_zeroblocks || cbp & luma_mask || mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
       v.x = mbx * unit->x + offset->x;
       v.y = mby * unit->y + offset->y;
    }
@@ -1104,19 +1098,19 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
 
          struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
 
-         gen_block_verts(vb, mb->cbp, mb->mbx, mb->mby,
+         gen_block_verts(vb, mb,
                          &unit, &half, &offsets[0][0],
                          32, 2, 1, use_zb, r->zero_block);
 
-         gen_block_verts(vb + 6, mb->cbp, mb->mbx, mb->mby,
+         gen_block_verts(vb + 6, mb,
                          &unit, &half, &offsets[1][0],
                          16, 2, 1, use_zb, r->zero_block);
 
-         gen_block_verts(vb + 12, mb->cbp, mb->mbx, mb->mby,
+         gen_block_verts(vb + 12, mb,
                          &unit, &half, &offsets[0][1],
                          8, 2, 1, use_zb, r->zero_block);
 
-         gen_block_verts(vb + 18, mb->cbp, mb->mbx, mb->mby,
+         gen_block_verts(vb + 18, mb,
                          &unit, &half, &offsets[1][1],
                          4, 2, 1, use_zb, r->zero_block);
 
@@ -1383,7 +1377,7 @@ grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
 }
 
 static void
-fill_zero_block(short *dst, unsigned dst_pitch)
+fill_frame_zero_block(short *dst, unsigned dst_pitch)
 {
    unsigned y;
 
@@ -1393,6 +1387,17 @@ fill_zero_block(short *dst, unsigned dst_pitch)
       memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
 }
 
+static void
+fill_field_zero_block(short *dst, unsigned dst_pitch)
+{
+   unsigned y;
+
+   assert(dst);
+
+   for (y = 0; y < BLOCK_HEIGHT; ++y)
+      memset(dst + y * dst_pitch * 2, 0, BLOCK_WIDTH * 2);
+}
+
 static void
 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
             enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
@@ -1414,7 +1419,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
          if ((cbp >> (5 - tb)) & 1) {
             if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
                grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
-                                      texels + y * tex_pitch * BLOCK_WIDTH +
+                                      texels + y * tex_pitch * BLOCK_HEIGHT +
                                       x * BLOCK_WIDTH, tex_pitch);
             }
             else {
@@ -1426,14 +1431,22 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
             ++sb;
          }
          else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
-            if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
-                ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
-               fill_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
-               if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
-                  r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
-                  r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
+            if(dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
+
+               if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
+                   ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
+
+                  fill_frame_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
+                  if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
+                     r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
+                     r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
+                  }
                }
             }
+            else {
+
+               fill_field_zero_block(texels + y * tex_pitch + x * BLOCK_WIDTH, tex_pitch);
+            }
          }
       }
    }
@@ -1455,7 +1468,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
       else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
          if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
              ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
-            fill_zero_block(texels, tex_pitch);
+            fill_frame_zero_block(texels, tex_pitch);
             if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
                r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
                r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
-- 
cgit v1.2.3


From b4c5c6f51ae5398c1be190233f7539f78885fa45 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 4 Nov 2010 21:41:39 +0100
Subject: [g3dvl] rework shader a bit

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 275 +++++------------------
 1 file changed, 56 insertions(+), 219 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index ec89a116913..fb9ee7c1a1d 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -78,43 +78,57 @@ enum MACROBLOCK_TYPE
    NUM_MACROBLOCK_TYPES
 };
 
-static bool
-create_intra_vert_shader(struct vl_mpeg12_mc_renderer *r)
+static void *
+create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
 {
    struct ureg_program *shader;
-   struct ureg_src vpos, vtex[3];
-   struct ureg_dst o_vpos, o_vtex[3];
-   unsigned i;
+   struct ureg_src vpos, vtex[3], vmv[4];
+   struct ureg_dst o_vpos, o_vtex[3], o_vmv[4];
+   unsigned i, j, count;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
-      return false;
+      return NULL;
 
    vpos = ureg_DECL_vs_input(shader, 0);
-   for (i = 0; i < 3; ++i)
-      vtex[i] = ureg_DECL_vs_input(shader, i + 1);
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
-   for (i = 0; i < 3; ++i)
-      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
+
+   for (i = 0; i < 3; ++i) {
+      vtex[i] = ureg_DECL_vs_input(shader, 1 + i);
+      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1 + i);
+   }
+   
+   count=0;
+   for (i = 0; i < ref_frames; ++i) {
+      for (j = 0; j < 2; ++j) {        
+        if(j < mv_per_frame) {
+           vmv[count] = ureg_DECL_vs_input(shader, 4 + i * 2 + j);
+           o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
+           count++;
+        }
+        /* workaround for r600g */
+        else if(ref_frames == 2)
+           ureg_DECL_vs_input(shader, 4 + i * 2 + j);
+      }
+   }
 
    /*
     * o_vpos = vpos
     * o_vtex[0..2] = vtex[0..2]
+    * o_vmv[0..count] = vpos + vmv[0..4] // Apply motion vector
     */
    ureg_MOV(shader, o_vpos, vpos);
    for (i = 0; i < 3; ++i)
       ureg_MOV(shader, o_vtex[i], vtex[i]);
+   for (i = 0; i < count; ++i)
+      ureg_ADD(shader, o_vmv[i], vpos, vmv[i]);
 
    ureg_END(shader);
 
-   r->i_vs = ureg_create_shader_and_destroy(shader, r->pipe);
-   if (!r->i_vs)
-      return false;
-
-   return true;
+   return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
-static bool
+static void *
 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
@@ -126,7 +140,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
-      return false;
+      return NULL;
 
    for (i = 0; i < 3; ++i)  {
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
@@ -153,92 +167,10 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_release_temporary(shader, temp);
    ureg_END(shader);
 
-   r->i_fs = ureg_create_shader_and_destroy(shader, r->pipe);
-   if (!r->i_fs)
-      return false;
-
-   return true;
-}
-
-static bool
-create_frame_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
-{
-   struct ureg_program *shader;
-   struct ureg_src vpos, vtex[4];
-   struct ureg_dst o_vpos, o_vtex[4];
-   unsigned i;
-
-   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
-   if (!shader)
-      return false;
-
-   vpos = ureg_DECL_vs_input(shader, 0);
-   for (i = 0; i < 4; ++i)
-      vtex[i] = ureg_DECL_vs_input(shader, i + 1);
-   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
-   for (i = 0; i < 4; ++i)
-      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
-
-   /*
-    * o_vpos = vpos
-    * o_vtex[0..2] = vtex[0..2]
-    * o_vtex[3] = vpos + vtex[3] // Apply motion vector
-    */
-   ureg_MOV(shader, o_vpos, vpos);
-   for (i = 0; i < 3; ++i)
-      ureg_MOV(shader, o_vtex[i], vtex[i]);
-   ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
-
-   ureg_END(shader);
-
-   r->p_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
-   if (!r->p_vs[0])
-      return false;
-
-   return true;
-}
-
-static bool
-create_field_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
-{
-   struct ureg_program *shader;
-   struct ureg_src vpos, vtex[5];
-   struct ureg_dst o_vpos, o_vtex[5];
-   unsigned i;
-
-   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
-   if (!shader)
-      return false;
-
-   vpos = ureg_DECL_vs_input(shader, 0);
-   for (i = 0; i < 5; ++i)
-      vtex[i] = ureg_DECL_vs_input(shader, i + 1);
-   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
-   for (i = 0; i < 5; ++i)
-      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
-
-   /*
-    * o_vpos = vpos
-    * o_vtex[0..2] = vtex[0..2]
-    * o_vtex[3] = vpos + vtex[3] // Apply motion vector
-    * o_vtex[4] = vpos + vtex[4] // Apply motion vector
-    */
-   ureg_MOV(shader, o_vpos, vpos);
-   for (i = 0; i < 3; ++i)
-      ureg_MOV(shader, o_vtex[i], vtex[i]);
-   ureg_ADD(shader, o_vtex[3], vpos, vtex[3]);
-   ureg_ADD(shader, o_vtex[4], vpos, vtex[4]);
-
-   ureg_END(shader);
-
-   r->p_vs[1] = ureg_create_shader_and_destroy(shader, r->pipe);
-   if (!r->p_vs[1])
-      return false;
-
-   return true;
+   return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
-static bool
+static void *
 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
@@ -250,7 +182,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
-      return false;
+      return NULL;
 
    for (i = 0; i < 4; ++i)  {
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
@@ -279,14 +211,10 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_release_temporary(shader, ref);
    ureg_END(shader);
 
-   r->p_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
-   if (!r->p_fs[0])
-      return false;
-
-   return true;
+   return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
-static bool
+static void *
 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
@@ -298,7 +226,7 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
-      return false;
+      return NULL;
 
    for (i = 0; i < 5; ++i)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
@@ -331,95 +259,10 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_release_temporary(shader, ref);
    ureg_END(shader);
 
-   r->p_fs[1] = ureg_create_shader_and_destroy(shader, r->pipe);
-   if (!r->p_fs[1])
-      return false;
-
-   return true;
-}
-
-static bool
-create_frame_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
-{
-   struct ureg_program *shader;
-   struct ureg_src vpos, vtex[5];
-   struct ureg_dst o_vpos, o_vtex[5];
-   unsigned i;
-
-   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
-   if (!shader)
-      return false;
-
-   vpos = ureg_DECL_vs_input(shader, 0);
-   for (i = 0; i < 4; ++i)
-      vtex[i] = ureg_DECL_vs_input(shader, i + 1);
-   /* Skip input 5 */
-   ureg_DECL_vs_input(shader, 5);
-   vtex[4] = ureg_DECL_vs_input(shader, 6);
-   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
-   for (i = 0; i < 5; ++i)
-      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
-
-   /*
-    * o_vpos = vpos
-    * o_vtex[0..2] = vtex[0..2]
-    * o_vtex[3..4] = vpos + vtex[3..4] // Apply motion vector
-    */
-   ureg_MOV(shader, o_vpos, vpos);
-   for (i = 0; i < 3; ++i)
-      ureg_MOV(shader, o_vtex[i], vtex[i]);
-   for (i = 3; i < 5; ++i)
-      ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
-
-   ureg_END(shader);
-
-   r->b_vs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
-   if (!r->b_vs[0])
-      return false;
-
-   return true;
-}
-
-static bool
-create_field_bi_pred_vert_shader(struct vl_mpeg12_mc_renderer *r)
-{
-   struct ureg_program *shader;
-   struct ureg_src vpos, vtex[7];
-   struct ureg_dst o_vpos, o_vtex[7];
-   unsigned i;
-
-   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
-   if (!shader)
-      return false;
-
-   vpos = ureg_DECL_vs_input(shader, 0);
-   for (i = 0; i < 7; ++i)
-      vtex[i] = ureg_DECL_vs_input(shader, i + 1);
-   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
-   for (i = 0; i < 7; ++i)
-      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, i + 1);
-
-   /*
-    * o_vpos = vpos
-    * o_vtex[0..2] = vtex[0..2]
-    * o_vtex[3..6] = vpos + vtex[3..6] // Apply motion vector
-    */
-   ureg_MOV(shader, o_vpos, vpos);
-   for (i = 0; i < 3; ++i)
-      ureg_MOV(shader, o_vtex[i], vtex[i]);
-   for (i = 3; i < 7; ++i)
-      ureg_ADD(shader, o_vtex[i], vpos, vtex[i]);
-
-   ureg_END(shader);
-
-   r->b_vs[1] = ureg_create_shader_and_destroy(shader, r->pipe);
-   if (!r->b_vs[1])
-      return false;
-
-   return true;
+   return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
-static bool
+static void *
 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
@@ -431,7 +274,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
-      return false;
+      return NULL;
 
    for (i = 0; i < 5; ++i)  {
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
@@ -466,14 +309,10 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_release_temporary(shader, ref[1]);
    ureg_END(shader);
 
-   r->b_fs[0] = ureg_create_shader_and_destroy(shader, r->pipe);
-   if (!r->b_fs[0])
-      return false;
-
-   return true;
+   return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
-static bool
+static void *
 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
@@ -485,7 +324,7 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
-      return false;
+      return NULL;
 
    for (i = 0; i < 5; ++i)  {
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
@@ -520,11 +359,7 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_release_temporary(shader, ref[1]);
    ureg_END(shader);
 
-   r->b_fs[1] = ureg_create_shader_and_destroy(shader, r->pipe);
-   if (!r->b_fs[1])
-      return false;
-
-   return true;
+   return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
 static void
@@ -650,16 +485,18 @@ init_shaders(struct vl_mpeg12_mc_renderer *r)
 {
    assert(r);
 
-   create_intra_vert_shader(r);
-   create_intra_frag_shader(r);
-   create_frame_pred_vert_shader(r);
-   create_field_pred_vert_shader(r);
-   create_frame_pred_frag_shader(r);
-   create_field_pred_frag_shader(r);
-   create_frame_bi_pred_vert_shader(r);
-   create_field_bi_pred_vert_shader(r);
-   create_frame_bi_pred_frag_shader(r);
-   create_field_bi_pred_frag_shader(r);
+   assert(r->i_vs = create_vert_shader(r, 0, 0));
+   assert(r->i_fs = create_intra_frag_shader(r));
+   
+   assert(r->p_vs[0] = create_vert_shader(r, 1, 1));
+   assert(r->p_vs[1] = create_vert_shader(r, 1, 2));
+   assert(r->p_fs[0] = create_frame_pred_frag_shader(r));
+   assert(r->p_fs[1] = create_field_pred_frag_shader(r));
+
+   assert(r->b_vs[0] = create_vert_shader(r, 2, 1));
+   assert(r->b_vs[1] = create_vert_shader(r, 2, 2));
+   assert(r->b_fs[0] = create_frame_bi_pred_frag_shader(r));
+   assert(r->b_fs[1] = create_field_bi_pred_frag_shader(r));
 
    return true;
 }
-- 
cgit v1.2.3


From 325233fdef179caee04b92c2042fdbbf1f96b9fd Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 4 Nov 2010 23:23:42 +0100
Subject: [g3dvl] move vertex normalisation into vertex shader

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 77 ++++++++++++++----------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  1 -
 2 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index fb9ee7c1a1d..fcdeaa57112 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -48,7 +48,7 @@
 
 struct vertex_shader_consts
 {
-   struct vertex4f denorm;
+   struct vertex4f norm;
 };
 
 struct fragment_shader_consts
@@ -82,7 +82,9 @@ static void *
 create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
 {
    struct ureg_program *shader;
+   struct ureg_src norm;
    struct ureg_src vpos, vtex[3], vmv[4];
+   struct ureg_dst temp;
    struct ureg_dst o_vpos, o_vtex[3], o_vmv[4];
    unsigned i, j, count;
 
@@ -90,6 +92,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    if (!shader)
       return NULL;
 
+   norm = ureg_DECL_constant(shader, 0);
+   temp = ureg_DECL_temporary(shader);
+
    vpos = ureg_DECL_vs_input(shader, 0);
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
 
@@ -113,16 +118,24 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    }
 
    /*
-    * o_vpos = vpos
-    * o_vtex[0..2] = vtex[0..2]
-    * o_vmv[0..count] = vpos + vmv[0..4] // Apply motion vector
+    * o_vpos = vpos * norm
+    * o_vtex[0..2] = vtex[0..2] * norm
+    * o_vmv[0..count] = o_vpos + vmv[0..4] * 0.5 // Apply motion vector
     */
-   ureg_MOV(shader, o_vpos, vpos);
-   for (i = 0; i < 3; ++i)
-      ureg_MOV(shader, o_vtex[i], vtex[i]);
-   for (i = 0; i < count; ++i)
-      ureg_ADD(shader, o_vmv[i], vpos, vmv[i]);
+   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, norm);
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
+   for (i = 0; i < 3; ++i) {
+      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), vtex[i], norm);
+      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_ZW), vtex[i]);
+   }
+   for (i = 0; i < count; ++i) {
+      ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vmv[i], 
+         ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
+      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(temp), norm, ureg_src(o_vpos));
+      ureg_MOV(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), vmv[i]);
+   }
 
+   ureg_release_temporary(shader, temp);
    ureg_END(shader);
 
    return ureg_create_shader_and_destroy(shader, r->pipe);
@@ -842,8 +855,8 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
 
          vb = ref_vb[1] + pos * 2 * 24;
 
-         mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
-         mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
+         mo_vec[0].x = mb->pmv[0][1][0];
+         mo_vec[0].y = mb->pmv[0][1][1];
 
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
             for (i = 0; i < 24 * 2; i += 2) {
@@ -852,8 +865,8 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
             }
          }
          else {
-            mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
-            mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
+            mo_vec[1].x = mb->pmv[1][1][0];
+            mo_vec[1].y = mb->pmv[1][1][1];
 
             for (i = 0; i < 24 * 2; i += 2) {
                vb[i].x = mo_vec[0].x;
@@ -875,21 +888,21 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
          vb = ref_vb[0] + pos * 2 * 24;
 
          if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
-             mo_vec[0].x = mb->pmv[0][1][0] * 0.5f * r->surface_tex_inv_size.x;
-             mo_vec[0].y = mb->pmv[0][1][1] * 0.5f * r->surface_tex_inv_size.y;
+             mo_vec[0].x = mb->pmv[0][1][0];
+             mo_vec[0].y = mb->pmv[0][1][1];
 
              if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
-                mo_vec[1].x = mb->pmv[1][1][0] * 0.5f * r->surface_tex_inv_size.x;
-                mo_vec[1].y = mb->pmv[1][1][1] * 0.5f * r->surface_tex_inv_size.y;
+                mo_vec[1].x = mb->pmv[1][1][0];
+                mo_vec[1].y = mb->pmv[1][1][1];
              }
          }
          else {
-            mo_vec[0].x = mb->pmv[0][0][0] * 0.5f * r->surface_tex_inv_size.x;
-            mo_vec[0].y = mb->pmv[0][0][1] * 0.5f * r->surface_tex_inv_size.y;
+            mo_vec[0].x = mb->pmv[0][0][0];
+            mo_vec[0].y = mb->pmv[0][0][1];
 
             if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
-               mo_vec[1].x = mb->pmv[1][0][0] * 0.5f * r->surface_tex_inv_size.x;
-               mo_vec[1].y = mb->pmv[1][0][1] * 0.5f * r->surface_tex_inv_size.y;
+               mo_vec[1].x = mb->pmv[1][0][0];
+               mo_vec[1].y = mb->pmv[1][0][1];
             }
          }
 
@@ -914,13 +927,13 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
       {
          const struct vertex2f unit =
          {
-            r->surface_tex_inv_size.x * MACROBLOCK_WIDTH,
-            r->surface_tex_inv_size.y * MACROBLOCK_HEIGHT
+            MACROBLOCK_WIDTH,
+            MACROBLOCK_HEIGHT
          };
          const struct vertex2f half =
          {
-            r->surface_tex_inv_size.x * (MACROBLOCK_WIDTH / 2),
-            r->surface_tex_inv_size.y * (MACROBLOCK_HEIGHT / 2)
+            (MACROBLOCK_WIDTH / 2),
+            (MACROBLOCK_HEIGHT / 2)
          };
          const struct vertex2f offsets[2][2] =
          {
@@ -1065,8 +1078,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       &buf_transfer
    );
 
-   vs_consts->denorm.x = r->surface->width;
-   vs_consts->denorm.y = r->surface->height;
+   vs_consts->norm.x = 1.0f / r->surface->width;
+   vs_consts->norm.y = 1.0f / r->surface->height;
 
    pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
 
@@ -1275,8 +1288,8 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
 
                   fill_frame_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
                   if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
-                     r->zero_block[0].x = (mbpx + x * 8) * r->surface_tex_inv_size.x;
-                     r->zero_block[0].y = (mbpy + y * 8) * r->surface_tex_inv_size.y;
+                     r->zero_block[0].x = (mbpx + x * 8);
+                     r->zero_block[0].y = (mbpy + y * 8);
                   }
                }
             }
@@ -1307,8 +1320,8 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
              ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
             fill_frame_zero_block(texels, tex_pitch);
             if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
-               r->zero_block[tb + 1].x = (mbpx << 1) * r->surface_tex_inv_size.x;
-               r->zero_block[tb + 1].y = (mbpy << 1) * r->surface_tex_inv_size.y;
+               r->zero_block[tb + 1].x = (mbpx << 1);
+               r->zero_block[tb + 1].y = (mbpy << 1);
             }
          }
       }
@@ -1469,8 +1482,6 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
       pipe_surface_reference(&renderer->past, past);
       pipe_surface_reference(&renderer->future, future);
       renderer->fence = fence;
-      renderer->surface_tex_inv_size.x = 1.0f / surface->width;
-      renderer->surface_tex_inv_size.y = 1.0f / surface->height;
    }
 
    while (num_macroblocks) {
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 85191cf6b02..3a56529aa6b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -104,7 +104,6 @@ struct vl_mpeg12_mc_renderer
    struct pipe_mpeg12_macroblock *macroblock_buf;
    struct pipe_transfer *tex_transfer[3];
    short *texels[3];
-   struct vertex2f surface_tex_inv_size;
    struct vertex2f zero_block[3];
 
    struct keymap *texview_map;
-- 
cgit v1.2.3


From de2eec6a3d0a22f342355c09515da8c6beca662b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 5 Nov 2010 00:20:33 +0100
Subject: [g3dvl] move scaling to macroblocksize into vertex shader

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 120 +++++++++++------------
 1 file changed, 57 insertions(+), 63 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index fcdeaa57112..0f40b04bb4c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -82,7 +82,7 @@ static void *
 create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
 {
    struct ureg_program *shader;
-   struct ureg_src norm;
+   struct ureg_src norm, mbs;
    struct ureg_src vpos, vtex[3], vmv[4];
    struct ureg_dst temp;
    struct ureg_dst o_vpos, o_vtex[3], o_vmv[4];
@@ -93,6 +93,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
       return NULL;
 
    norm = ureg_DECL_constant(shader, 0);
+   mbs = ureg_imm2f(shader, MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT);
    temp = ureg_DECL_temporary(shader);
 
    vpos = ureg_DECL_vs_input(shader, 0);
@@ -118,14 +119,16 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    }
 
    /*
-    * o_vpos = vpos * norm
-    * o_vtex[0..2] = vtex[0..2] * norm
+    * o_vpos = vpos * norm * mbs
+    * o_vtex[0..2] = vtex[0..2] * norm * mbs
     * o_vmv[0..count] = o_vpos + vmv[0..4] * 0.5 // Apply motion vector
     */
-   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, norm);
+   ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vpos, mbs);
+   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(temp), norm);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
    for (i = 0; i < 3; ++i) {
-      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), vtex[i], norm);
+      ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vtex[i], mbs);
+      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(temp), norm);
       ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_ZW), vtex[i]);
    }
    for (i = 0; i < count; ++i) {
@@ -727,7 +730,7 @@ get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
 
 static void
 gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
-                const struct vertex2f *unit, const struct vertex2f *half, const struct vertex2f *offset,
+                const struct vertex2f *offset,
                 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
                 bool use_zeroblocks, struct vertex2f *zero_blocks)
 {
@@ -735,36 +738,41 @@ gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
    unsigned mbx = mb->mbx;
    unsigned mby = mb->mby;
 
+   const struct vertex2f half =
+   {
+      0.5f,
+      0.5f
+   };
    struct vertex2f v;
 
    assert(vb);
-   assert(unit && half && offset);
+   assert(offset);
    assert(zero_blocks || !use_zeroblocks);
 
    /* Generate vertices for two triangles covering a block */
-   v.x = mbx * unit->x + offset->x;
-   v.y = mby * unit->y + offset->y;
+   v.x = mbx + offset->x;
+   v.y = mby + offset->y;
 
    vb[0].pos.x = v.x;
    vb[0].pos.y = v.y;
    vb[1].pos.x = v.x;
-   vb[1].pos.y = v.y + half->y;
-   vb[2].pos.x = v.x + half->x;
+   vb[1].pos.y = v.y + half.y;
+   vb[2].pos.x = v.x + half.x;
    vb[2].pos.y = v.y;
-   vb[3].pos.x = v.x + half->x;
+   vb[3].pos.x = v.x + half.x;
    vb[3].pos.y = v.y;
    vb[4].pos.x = v.x;
-   vb[4].pos.y = v.y + half->y;
-   vb[5].pos.x = v.x + half->x;
-   vb[5].pos.y = v.y + half->y;
+   vb[4].pos.y = v.y + half.y;
+   vb[5].pos.x = v.x + half.x;
+   vb[5].pos.y = v.y + half.y;
 
    /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
       or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
       for this channel is defined for this block) */
 
    if (!use_zeroblocks || cbp & luma_mask || mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
-      v.x = mbx * unit->x + offset->x;
-      v.y = mby * unit->y + offset->y;
+      v.x = mbx + offset->x;
+      v.y = mby + offset->y;
    }
    else {
       v.x = zero_blocks[0].x;
@@ -774,19 +782,19 @@ gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
    vb[0].luma_tc.x = v.x;
    vb[0].luma_tc.y = v.y;
    vb[1].luma_tc.x = v.x;
-   vb[1].luma_tc.y = v.y + half->y;
-   vb[2].luma_tc.x = v.x + half->x;
+   vb[1].luma_tc.y = v.y + half.y;
+   vb[2].luma_tc.x = v.x + half.x;
    vb[2].luma_tc.y = v.y;
-   vb[3].luma_tc.x = v.x + half->x;
+   vb[3].luma_tc.x = v.x + half.x;
    vb[3].luma_tc.y = v.y;
    vb[4].luma_tc.x = v.x;
-   vb[4].luma_tc.y = v.y + half->y;
-   vb[5].luma_tc.x = v.x + half->x;
-   vb[5].luma_tc.y = v.y + half->y;
+   vb[4].luma_tc.y = v.y + half.y;
+   vb[5].luma_tc.x = v.x + half.x;
+   vb[5].luma_tc.y = v.y + half.y;
 
    if (!use_zeroblocks || cbp & cb_mask) {
-      v.x = mbx * unit->x + offset->x;
-      v.y = mby * unit->y + offset->y;
+      v.x = mbx + offset->x;
+      v.y = mby + offset->y;
    }
    else {
       v.x = zero_blocks[1].x;
@@ -796,19 +804,19 @@ gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
    vb[0].cb_tc.x = v.x;
    vb[0].cb_tc.y = v.y;
    vb[1].cb_tc.x = v.x;
-   vb[1].cb_tc.y = v.y + half->y;
-   vb[2].cb_tc.x = v.x + half->x;
+   vb[1].cb_tc.y = v.y + half.y;
+   vb[2].cb_tc.x = v.x + half.x;
    vb[2].cb_tc.y = v.y;
-   vb[3].cb_tc.x = v.x + half->x;
+   vb[3].cb_tc.x = v.x + half.x;
    vb[3].cb_tc.y = v.y;
    vb[4].cb_tc.x = v.x;
-   vb[4].cb_tc.y = v.y + half->y;
-   vb[5].cb_tc.x = v.x + half->x;
-   vb[5].cb_tc.y = v.y + half->y;
+   vb[4].cb_tc.y = v.y + half.y;
+   vb[5].cb_tc.x = v.x + half.x;
+   vb[5].cb_tc.y = v.y + half.y;
 
    if (!use_zeroblocks || cbp & cr_mask) {
-      v.x = mbx * unit->x + offset->x;
-      v.y = mby * unit->y + offset->y;
+      v.x = mbx + offset->x;
+      v.y = mby + offset->y;
    }
    else {
       v.x = zero_blocks[2].x;
@@ -818,15 +826,15 @@ gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
    vb[0].cr_tc.x = v.x;
    vb[0].cr_tc.y = v.y;
    vb[1].cr_tc.x = v.x;
-   vb[1].cr_tc.y = v.y + half->y;
-   vb[2].cr_tc.x = v.x + half->x;
+   vb[1].cr_tc.y = v.y + half.y;
+   vb[2].cr_tc.x = v.x + half.x;
    vb[2].cr_tc.y = v.y;
-   vb[3].cr_tc.x = v.x + half->x;
+   vb[3].cr_tc.x = v.x + half.x;
    vb[3].cr_tc.y = v.y;
    vb[4].cr_tc.x = v.x;
-   vb[4].cr_tc.y = v.y + half->y;
-   vb[5].cr_tc.x = v.x + half->x;
-   vb[5].cr_tc.y = v.y + half->y;
+   vb[4].cr_tc.y = v.y + half.y;
+   vb[5].cr_tc.x = v.x + half.x;
+   vb[5].cr_tc.y = v.y + half.y;
 }
 
 static void
@@ -925,43 +933,29 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
       }
       case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
       {
-         const struct vertex2f unit =
-         {
-            MACROBLOCK_WIDTH,
-            MACROBLOCK_HEIGHT
-         };
-         const struct vertex2f half =
-         {
-            (MACROBLOCK_WIDTH / 2),
-            (MACROBLOCK_HEIGHT / 2)
-         };
          const struct vertex2f offsets[2][2] =
          {
             {
-               {0, 0}, {0, half.y}
+               {0, 0}, {0, 0.5f}
             },
             {
-               {half.x, 0}, {half.x, half.y}
+               {0.5f, 0}, {0.5f, 0.5f}
             }
          };
          const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
 
          struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
 
-         gen_block_verts(vb, mb,
-                         &unit, &half, &offsets[0][0],
+         gen_block_verts(vb, mb, &offsets[0][0],
                          32, 2, 1, use_zb, r->zero_block);
 
-         gen_block_verts(vb + 6, mb,
-                         &unit, &half, &offsets[1][0],
+         gen_block_verts(vb + 6, mb, &offsets[1][0],
                          16, 2, 1, use_zb, r->zero_block);
 
-         gen_block_verts(vb + 12, mb,
-                         &unit, &half, &offsets[0][1],
+         gen_block_verts(vb + 12, mb, &offsets[0][1],
                          8, 2, 1, use_zb, r->zero_block);
 
-         gen_block_verts(vb + 18, mb,
-                         &unit, &half, &offsets[1][1],
+         gen_block_verts(vb + 18, mb, &offsets[1][1],
                          4, 2, 1, use_zb, r->zero_block);
 
          break;
@@ -1288,8 +1282,8 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
 
                   fill_frame_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
                   if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
-                     r->zero_block[0].x = (mbpx + x * 8);
-                     r->zero_block[0].y = (mbpy + y * 8);
+                     r->zero_block[0].x = (mbx + x * 0.5f);
+                     r->zero_block[0].y = (mby + y * 0.5f);
                   }
                }
             }
@@ -1320,8 +1314,8 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
              ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
             fill_frame_zero_block(texels, tex_pitch);
             if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
-               r->zero_block[tb + 1].x = (mbpx << 1);
-               r->zero_block[tb + 1].y = (mbpy << 1);
+               r->zero_block[tb + 1].x = mbx;
+               r->zero_block[tb + 1].y = mby;
             }
          }
       }
-- 
cgit v1.2.3


From dd51858850f7ff577306784378975762803f29cb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 5 Nov 2010 02:16:43 +0100
Subject: [g3dvl] it finally starts to look like a badly deinterlaced video

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 72 ++++++++++++++++++------
 1 file changed, 55 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 0f40b04bb4c..4aa4cb2296a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -85,7 +85,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    struct ureg_src norm, mbs;
    struct ureg_src vpos, vtex[3], vmv[4];
    struct ureg_dst temp;
-   struct ureg_dst o_vpos, o_vtex[3], o_vmv[4];
+   struct ureg_dst o_vpos, o_vtex[3], o_vmv[4], o_frc;
    unsigned i, j, count;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -118,10 +118,13 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
       }
    }
 
+   o_frc = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
+
    /*
     * o_vpos = vpos * norm * mbs
     * o_vtex[0..2] = vtex[0..2] * norm * mbs
     * o_vmv[0..count] = o_vpos + vmv[0..4] * 0.5 // Apply motion vector
+    * o_frc = fract(vpos) * mbs
     */
    ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vpos, mbs);
    ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(temp), norm);
@@ -137,6 +140,13 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
       ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(temp), norm, ureg_src(o_vpos));
       ureg_MOV(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), vmv[i]);
    }
+   if (mv_per_frame == 2) {
+      ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vpos, ureg_imm1f(shader, 0.5f));
+      ureg_FRC(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), ureg_src(temp));
+      ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), ureg_src(temp), ureg_imm1f(shader, 2.0f));
+      ureg_MUL(shader, ureg_writemask(o_frc, TGSI_WRITEMASK_XY), ureg_src(temp), mbs);
+      ureg_MOV(shader, ureg_writemask(o_frc, TGSI_WRITEMASK_ZW), vpos);
+   }
 
    ureg_release_temporary(shader, temp);
    ureg_END(shader);
@@ -234,11 +244,11 @@ static void *
 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src tc[5];
+   struct ureg_src tc[5], fract;
    struct ureg_src sampler[4];
    struct ureg_dst texel, ref, tmp;
    struct ureg_dst fragment;
-   unsigned i;
+   unsigned i, label;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -246,6 +256,7 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    for (i = 0; i < 5; ++i)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+   fract = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 6, TGSI_INTERPOLATE_LINEAR);
    for (i = 0; i < 4; ++i)
       sampler[i] = ureg_DECL_sampler(shader, i);
 
@@ -258,16 +269,27 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     * texel.r = tex(tc[0], sampler[0])
     * texel.g = tex(tc[1], sampler[1])
     * texel.b = tex(tc[2], sampler[2])
-    * ref = tex(tc[3], sampler[3])
+    * if(fract & 1)
+    *    ref = tex(tc[3], sampler[3])
+    * else
+    *    ref = tex(tc[4], sampler[3])
     * fragment = texel * scale + ref
     */
    for (i = 0; i < 3; ++i) {
       /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
-      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
-      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
+      ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
+      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
    }
 
-   ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[4], sampler[3]);
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), fract, ureg_imm1f(shader, 0.5f));
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp));
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
+   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
+      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
+   ureg_ELSE(shader, &label);
+      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[4], sampler[3]);
+   ureg_ENDIF(shader);
+
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
 
    ureg_release_temporary(shader, tmp);
@@ -284,7 +306,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_program *shader;
    struct ureg_src tc[5];
    struct ureg_src sampler[5];
-   struct ureg_dst texel, ref[2];
+   struct ureg_dst texel, ref[2], tmp;
    struct ureg_dst fragment;
    unsigned i;
 
@@ -299,6 +321,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    texel = ureg_DECL_temporary(shader);
    ref[0] = ureg_DECL_temporary(shader);
    ref[1] = ureg_DECL_temporary(shader);
+   tmp = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
@@ -311,8 +334,8 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     */
    for (i = 0; i < 3; ++i) {
       /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
-      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
-      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
+      ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
+      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
    }
    ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
    ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
@@ -320,6 +343,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
 
+   ureg_release_temporary(shader, tmp);
    ureg_release_temporary(shader, texel);
    ureg_release_temporary(shader, ref[0]);
    ureg_release_temporary(shader, ref[1]);
@@ -332,23 +356,26 @@ static void *
 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src tc[5];
+   struct ureg_src tc[7], fract;
    struct ureg_src sampler[5];
-   struct ureg_dst texel, ref[2];
+   struct ureg_dst texel, ref[2], tmp;
    struct ureg_dst fragment;
-   unsigned i;
+   unsigned i, label;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return NULL;
 
-   for (i = 0; i < 5; ++i)  {
+   for (i = 0; i < 7; ++i)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+   fract = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 8, TGSI_INTERPOLATE_LINEAR);
+   for (i = 0; i < 5; ++i)
       sampler[i] = ureg_DECL_sampler(shader, i);
-   }
+
    texel = ureg_DECL_temporary(shader);
    ref[0] = ureg_DECL_temporary(shader);
    ref[1] = ureg_DECL_temporary(shader);
+   tmp = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
@@ -364,12 +391,23 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
       ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
    }
-   ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
-   ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
+
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), fract, ureg_imm1f(shader, 0.5f));
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp));
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
+   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
+     ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
+     ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
+   ureg_ELSE(shader, &label);
+     ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[5], sampler[3]);
+     ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[6], sampler[4]);
+   ureg_ENDIF(shader);
+
    ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
 
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
 
+   ureg_release_temporary(shader, tmp);
    ureg_release_temporary(shader, texel);
    ureg_release_temporary(shader, ref[0]);
    ureg_release_temporary(shader, ref[1]);
-- 
cgit v1.2.3


From d7fc97b6ab1bbc191eac153e0203a2c204f4b7e6 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 5 Nov 2010 22:48:27 +0100
Subject: [g3dvl] simplyfy shaders and fix bugs

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 39 ++++++++++--------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 4aa4cb2296a..14ce5f352d0 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -85,7 +85,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    struct ureg_src norm, mbs;
    struct ureg_src vpos, vtex[3], vmv[4];
    struct ureg_dst temp;
-   struct ureg_dst o_vpos, o_vtex[3], o_vmv[4], o_frc;
+   struct ureg_dst o_vpos, o_vtex[3], o_vmv[4], o_line;
    unsigned i, j, count;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -118,7 +118,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
       }
    }
 
-   o_frc = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
+   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
 
    /*
     * o_vpos = vpos * norm * mbs
@@ -132,20 +132,15 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    for (i = 0; i < 3; ++i) {
       ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vtex[i], mbs);
       ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(temp), norm);
-      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_ZW), vtex[i]);
    }
    for (i = 0; i < count; ++i) {
       ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vmv[i], 
          ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
       ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(temp), norm, ureg_src(o_vpos));
-      ureg_MOV(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), vmv[i]);
    }
    if (mv_per_frame == 2) {
-      ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vpos, ureg_imm1f(shader, 0.5f));
-      ureg_FRC(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), ureg_src(temp));
-      ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), ureg_src(temp), ureg_imm1f(shader, 2.0f));
-      ureg_MUL(shader, ureg_writemask(o_frc, TGSI_WRITEMASK_XY), ureg_src(temp), mbs);
-      ureg_MOV(shader, ureg_writemask(o_frc, TGSI_WRITEMASK_ZW), vpos);
+      ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f));
+      ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vpos, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
    }
 
    ureg_release_temporary(shader, temp);
@@ -244,7 +239,7 @@ static void *
 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src tc[5], fract;
+   struct ureg_src tc[5], line;
    struct ureg_src sampler[4];
    struct ureg_dst texel, ref, tmp;
    struct ureg_dst fragment;
@@ -256,7 +251,7 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    for (i = 0; i < 5; ++i)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
-   fract = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 6, TGSI_INTERPOLATE_LINEAR);
+   line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 6, TGSI_INTERPOLATE_LINEAR);
    for (i = 0; i < 4; ++i)
       sampler[i] = ureg_DECL_sampler(shader, i);
 
@@ -281,13 +276,12 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
    }
 
-   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), fract, ureg_imm1f(shader, 0.5f));
-   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp));
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
    ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
    ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
-      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
-   ureg_ELSE(shader, &label);
       ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[4], sampler[3]);
+   ureg_ELSE(shader, &label);
+      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
    ureg_ENDIF(shader);
 
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
@@ -356,7 +350,7 @@ static void *
 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src tc[7], fract;
+   struct ureg_src tc[7], line;
    struct ureg_src sampler[5];
    struct ureg_dst texel, ref[2], tmp;
    struct ureg_dst fragment;
@@ -368,7 +362,7 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    for (i = 0; i < 7; ++i)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
-   fract = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 8, TGSI_INTERPOLATE_LINEAR);
+   line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 8, TGSI_INTERPOLATE_LINEAR);
    for (i = 0; i < 5; ++i)
       sampler[i] = ureg_DECL_sampler(shader, i);
 
@@ -392,15 +386,14 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
    }
 
-   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), fract, ureg_imm1f(shader, 0.5f));
-   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp));
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
    ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
    ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
-     ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
-     ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
+      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[4], sampler[3]);
+      ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[6], sampler[4]);
    ureg_ELSE(shader, &label);
-     ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[5], sampler[3]);
-     ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[6], sampler[4]);
+      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
+      ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[5], sampler[4]);
    ureg_ENDIF(shader);
 
    ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
-- 
cgit v1.2.3


From 34e5ae5aed8187e0f6395dee2985091cea3a6df6 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 6 Nov 2010 16:18:24 +0100
Subject: [g3dvl] motion type depends on picture structure not dct type

---
 src/gallium/state_trackers/xorg/xvmc/surface.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 0decc45a0bb..9709f2b2373 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -72,14 +72,14 @@ static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic)
    return -1;
 }
 
-static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, int xvmc_dct_type)
+static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, unsigned int xvmc_picture_structure)
 {
    switch (xvmc_motion_type) {
       case XVMC_PREDICTION_FRAME:
-         if (xvmc_dct_type == XVMC_DCT_TYPE_FIELD)
-            return PIPE_MPEG12_MOTION_TYPE_16x8;
-         else if (xvmc_dct_type == XVMC_DCT_TYPE_FRAME)
+         if (xvmc_picture_structure == XVMC_FRAME_PICTURE)
             return PIPE_MPEG12_MOTION_TYPE_FRAME;
+         else
+            return PIPE_MPEG12_MOTION_TYPE_16x8;
          break;
       case XVMC_PREDICTION_FIELD:
          return PIPE_MPEG12_MOTION_TYPE_FIELD;
@@ -89,7 +89,7 @@ static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, int xvmc_
          assert(0);
    }
 
-   XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized motion type 0x%08X (with DCT type 0x%08X).\n", xvmc_motion_type, xvmc_dct_type);
+   XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized motion type 0x%08X (with picture structure 0x%08X).\n", xvmc_motion_type, xvmc_picture_structure);
 
    return -1;
 }
@@ -146,6 +146,7 @@ CreateOrResizeBackBuffer(struct vl_context *vctx, unsigned int width, unsigned i
 
 static void
 MacroBlocksToPipe(struct pipe_screen *screen,
+                  unsigned int xvmc_picture_structure,
                   const XvMCMacroBlockArray *xvmc_macroblocks,
                   const XvMCBlockArray *xvmc_blocks,
                   unsigned int first_macroblock,
@@ -168,7 +169,7 @@ MacroBlocksToPipe(struct pipe_screen *screen,
       pipe_macroblocks->mby = xvmc_mb->y;
       pipe_macroblocks->mb_type = TypeToPipe(xvmc_mb->macroblock_type);
       if (pipe_macroblocks->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA)
-         pipe_macroblocks->mo_type = MotionToPipe(xvmc_mb->motion_type, xvmc_mb->dct_type);
+         pipe_macroblocks->mo_type = MotionToPipe(xvmc_mb->motion_type, xvmc_picture_structure);
       /* Get rid of Valgrind 'undefined' warnings */
       else
          pipe_macroblocks->mo_type = -1;
@@ -327,7 +328,7 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    p_vsfc = past_surface ? past_surface_priv->pipe_vsfc : NULL;
    f_vsfc = future_surface ? future_surface_priv->pipe_vsfc : NULL;
 
-   MacroBlocksToPipe(vpipe->screen, macroblocks, blocks, first_macroblock,
+   MacroBlocksToPipe(vpipe->screen, picture_structure, macroblocks, blocks, first_macroblock,
                      num_macroblocks, pipe_macroblocks);
 
    vpipe->set_decode_target(vpipe, t_vsfc);
-- 
cgit v1.2.3


From 725a5e15cfc7ffc1c5d940b01b9b010ed726a4eb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 6 Nov 2010 17:08:55 +0100
Subject: [g3dvl] cleanup naming convention and comments

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 47 ++++++++++++++----------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 14ce5f352d0..e24186ea6db 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -84,7 +84,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    struct ureg_program *shader;
    struct ureg_src norm, mbs;
    struct ureg_src vpos, vtex[3], vmv[4];
-   struct ureg_dst temp;
+   struct ureg_dst tmp;
    struct ureg_dst o_vpos, o_vtex[3], o_vmv[4], o_line;
    unsigned i, j, count;
 
@@ -94,7 +94,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 
    norm = ureg_DECL_constant(shader, 0);
    mbs = ureg_imm2f(shader, MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT);
-   temp = ureg_DECL_temporary(shader);
+   tmp = ureg_DECL_temporary(shader);
 
    vpos = ureg_DECL_vs_input(shader, 0);
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
@@ -123,27 +123,31 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    /*
     * o_vpos = vpos * norm * mbs
     * o_vtex[0..2] = vtex[0..2] * norm * mbs
-    * o_vmv[0..count] = o_vpos + vmv[0..4] * 0.5 // Apply motion vector
-    * o_frc = fract(vpos) * mbs
+    * o_vmv[0..count] = o_vpos + vmv[0..4] * norm * 0.5 // Apply motion vector
+    * o_line.x = 1
+    * o_line.y = vpos.y * 8
     */
-   ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vpos, mbs);
-   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(temp), norm);
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, mbs);
+   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), norm);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
+
    for (i = 0; i < 3; ++i) {
-      ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vtex[i], mbs);
-      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(temp), norm);
+      ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vtex[i], mbs);
+      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(tmp), norm);
    }
+
    for (i = 0; i < count; ++i) {
-      ureg_MUL(shader, ureg_writemask(temp, TGSI_WRITEMASK_XY), vmv[i], 
+      ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vmv[i], 
          ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
-      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(temp), norm, ureg_src(o_vpos));
+      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(tmp), norm, ureg_src(o_vpos));
    }
+
    if (mv_per_frame == 2) {
       ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f));
       ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vpos, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
    }
 
-   ureg_release_temporary(shader, temp);
+   ureg_release_temporary(shader, tmp);
    ureg_END(shader);
 
    return ureg_create_shader_and_destroy(shader, r->pipe);
@@ -155,7 +159,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_program *shader;
    struct ureg_src tc[3];
    struct ureg_src sampler[3];
-   struct ureg_dst texel, temp;
+   struct ureg_dst texel, tmp;
    struct ureg_dst fragment;
    unsigned i;
 
@@ -168,7 +172,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
       sampler[i] = ureg_DECL_sampler(shader, i);
    }
    texel = ureg_DECL_temporary(shader);
-   temp = ureg_DECL_temporary(shader);
+   tmp = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
@@ -179,13 +183,13 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
     */
    for (i = 0; i < 3; ++i) {
       /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
-      ureg_TEX(shader, temp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
-      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(temp), TGSI_SWIZZLE_X));
+      ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
+      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
    }
    ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
 
    ureg_release_temporary(shader, texel);
-   ureg_release_temporary(shader, temp);
+   ureg_release_temporary(shader, tmp);
    ureg_END(shader);
 
    return ureg_create_shader_and_destroy(shader, r->pipe);
@@ -264,10 +268,10 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     * texel.r = tex(tc[0], sampler[0])
     * texel.g = tex(tc[1], sampler[1])
     * texel.b = tex(tc[2], sampler[2])
-    * if(fract & 1)
-    *    ref = tex(tc[3], sampler[3])
-    * else
+    * if(line % 2)
     *    ref = tex(tc[4], sampler[3])
+    * else
+    *    ref = tex(tc[3], sampler[3])
     * fragment = texel * scale + ref
     */
    for (i = 0; i < 3; ++i) {
@@ -376,7 +380,10 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     * texel.r = tex(tc[0], sampler[0])
     * texel.g = tex(tc[1], sampler[1])
     * texel.b = tex(tc[2], sampler[2])
-    * ref[0..1 = tex(tc[3..4], sampler[3..4])
+    * if(line % 2)
+    *    ref[0..1] = tex(tc[4|6], sampler[3..4])
+    * else
+    *    ref[0..1] = tex(tc[3|5], sampler[3..4])
     * ref[0] = lerp(ref[0], ref[1], 0.5)
     * fragment = texel * scale + ref[0]
     */
-- 
cgit v1.2.3


From 5182416f030af71bd2fa4b00edffc268cec0e050 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 9 Nov 2010 20:01:58 +0100
Subject: [g3dvl] start handling motion_vertical_field_select

---
 src/gallium/include/pipe/p_video_state.h       | 1 +
 src/gallium/state_trackers/xorg/xvmc/surface.c | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 5eb96352139..e5e85e3deb3 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -89,6 +89,7 @@ struct pipe_mpeg12_macroblock
    enum pipe_mpeg12_motion_type mo_type;
    enum pipe_mpeg12_dct_type dct_type;
    signed pmv[2][2][2];
+   bool mvfs[2][2];
    unsigned cbp;
    short *blocks;
 };
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 9709f2b2373..0a236e26429 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -181,6 +181,11 @@ MacroBlocksToPipe(struct pipe_screen *screen,
             for (l = 0; l < 2; ++l)
                pipe_macroblocks->pmv[j][k][l] = xvmc_mb->PMV[j][k][l];
 
+      pipe_macroblocks->mvfs[0][0] = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_FIRST_FORWARD;
+      pipe_macroblocks->mvfs[0][1] = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_FIRST_BACKWARD;
+      pipe_macroblocks->mvfs[1][0] = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_SECOND_FORWARD;
+      pipe_macroblocks->mvfs[1][1] = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_SECOND_BACKWARD;
+
       pipe_macroblocks->cbp = xvmc_mb->coded_block_pattern;
       pipe_macroblocks->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
 
-- 
cgit v1.2.3


From 1eade3271ab167a83fcb0d8994530ea677d68575 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 9 Nov 2010 23:18:33 +0100
Subject: [g3dvl] workaround for motion vertical field selection

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 31 ++++++++++++++++++------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index e24186ea6db..975f0c1d10f 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -911,8 +911,13 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
             }
          }
          else {
+            mo_vec[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
+
             mo_vec[1].x = mb->pmv[1][1][0];
-            mo_vec[1].y = mb->pmv[1][1][1];
+            mo_vec[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
+
+            if(mb->mvfs[0][1]) mo_vec[0].y += 2;
+            if(!mb->mvfs[1][1]) mo_vec[1].y -= 2;
 
             for (i = 0; i < 24 * 2; i += 2) {
                vb[i].x = mo_vec[0].x;
@@ -934,21 +939,31 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
          vb = ref_vb[0] + pos * 2 * 24;
 
          if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
-             mo_vec[0].x = mb->pmv[0][1][0];
-             mo_vec[0].y = mb->pmv[0][1][1];
+            mo_vec[0].x = mb->pmv[0][1][0];
+            mo_vec[0].y = mb->pmv[0][1][1];
+
+            if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
+               mo_vec[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
 
-             if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
-                mo_vec[1].x = mb->pmv[1][1][0];
-                mo_vec[1].y = mb->pmv[1][1][1];
-             }
+               mo_vec[1].x = mb->pmv[1][1][0];
+               mo_vec[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
+
+               if(mb->mvfs[0][1]) mo_vec[0].y += 2;
+               if(!mb->mvfs[1][1]) mo_vec[1].y -= 2;
+            }
          }
          else {
             mo_vec[0].x = mb->pmv[0][0][0];
             mo_vec[0].y = mb->pmv[0][0][1];
 
             if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
+               mo_vec[0].y = mb->pmv[0][0][1] - (mb->pmv[0][0][1] % 4);
+
                mo_vec[1].x = mb->pmv[1][0][0];
-               mo_vec[1].y = mb->pmv[1][0][1];
+               mo_vec[1].y = mb->pmv[1][0][1] - (mb->pmv[1][0][1] % 4);
+
+               if(mb->mvfs[0][0]) mo_vec[0].y += 2;
+               if(!mb->mvfs[1][0]) mo_vec[1].y -= 2;
             }
          }
 
-- 
cgit v1.2.3


From c2ec28be3a659ae68b7c0956cd91c5de60d413aa Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 10 Nov 2010 15:24:28 +0100
Subject: [g3dvl] again rework vertex shader a bit

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 51 ++++++++++++++++--------
 1 file changed, 34 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 975f0c1d10f..3dd9c7d4117 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -84,9 +84,10 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    struct ureg_program *shader;
    struct ureg_src norm, mbs;
    struct ureg_src vpos, vtex[3], vmv[4];
-   struct ureg_dst tmp;
+   struct ureg_dst t_vpos, scale;
    struct ureg_dst o_vpos, o_vtex[3], o_vmv[4], o_line;
    unsigned i, j, count;
+   bool interlaced = mv_per_frame == 2;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
@@ -94,7 +95,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 
    norm = ureg_DECL_constant(shader, 0);
    mbs = ureg_imm2f(shader, MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT);
-   tmp = ureg_DECL_temporary(shader);
+
+   t_vpos = ureg_DECL_temporary(shader);
+   scale = ureg_DECL_temporary(shader);
 
    vpos = ureg_DECL_vs_input(shader, 0);
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
@@ -121,33 +124,47 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
 
    /*
-    * o_vpos = vpos * norm * mbs
-    * o_vtex[0..2] = vtex[0..2] * norm * mbs
-    * o_vmv[0..count] = o_vpos + vmv[0..4] * norm * 0.5 // Apply motion vector
-    * o_line.x = 1
-    * o_line.y = vpos.y * 8
+    * scale = norm * mbs;
+    *
+    * t_vpos = vpos * scale
+    * o_vpos = t_vpos
+    *
+    * o_vtex[0..2] = vtex[0..2] * scale
+    *
+    * if(count > 0) { // Apply motion vectors
+    *    scale = norm * 0.5;
+    *    o_vmv[0..count] = t_vpos + vmv[0..4] * scale
+    * }
+    *
+    * if(interlaced) {
+    *    o_line.x = 1
+    *    o_line.y = vpos.y * 8
+    * }
     */
-   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, mbs);
-   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), norm);
+   ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, mbs);
+
+   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, ureg_src(scale));
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
    for (i = 0; i < 3; ++i) {
-      ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vtex[i], mbs);
-      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(tmp), norm);
+      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), vtex[i], ureg_src(scale));
    }
 
-   for (i = 0; i < count; ++i) {
-      ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vmv[i], 
-         ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
-      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(tmp), norm, ureg_src(o_vpos));
+   if(count > 0) {
+      ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, ureg_imm1f(shader, 0.5f));
+      for (i = 0; i < count; ++i)
+         ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(scale), vmv[i], ureg_src(t_vpos));
    }
 
-   if (mv_per_frame == 2) {
+   if (interlaced) {
       ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f));
       ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vpos, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
    }
 
-   ureg_release_temporary(shader, tmp);
+   ureg_release_temporary(shader, t_vpos);
+   ureg_release_temporary(shader, scale);
+
    ureg_END(shader);
 
    return ureg_create_shader_and_destroy(shader, r->pipe);
-- 
cgit v1.2.3


From d073fec718bbc8f0f520b12d568b9828dd164042 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 10 Nov 2010 21:35:55 +0100
Subject: [g3dvl] move to 3D textures for y cb cr

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 178 +++++++++++------------
 1 file changed, 84 insertions(+), 94 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 3dd9c7d4117..f1f7567e2e9 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -148,7 +148,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
    for (i = 0; i < 3; ++i) {
-      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), vtex[i], ureg_src(scale));
+      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XYZ), vtex[i], ureg_src(scale));
    }
 
    if(count > 0) {
@@ -170,43 +170,59 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
-static void *
-create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
+static struct ureg_dst
+fetch_ycbcr(struct ureg_program *shader)
 {
-   struct ureg_program *shader;
    struct ureg_src tc[3];
    struct ureg_src sampler[3];
    struct ureg_dst texel, tmp;
-   struct ureg_dst fragment;
    unsigned i;
 
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
+   texel = ureg_DECL_temporary(shader);
+   tmp = ureg_DECL_temporary(shader);
 
    for (i = 0; i < 3; ++i)  {
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
       sampler[i] = ureg_DECL_sampler(shader, i);
    }
-   texel = ureg_DECL_temporary(shader);
-   tmp = ureg_DECL_temporary(shader);
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
-    * texel.r = tex(tc[0], sampler[0])
-    * texel.g = tex(tc[1], sampler[1])
-    * texel.b = tex(tc[2], sampler[2])
-    * fragment = texel * scale
+    * texel.y  = tex(tc[0], sampler[0])
+    * texel.cb = tex(tc[1], sampler[1])
+    * texel.cr = tex(tc[2], sampler[2])
     */
    for (i = 0; i < 3; ++i) {
       /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
-      ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
+      ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, tc[i], sampler[i]);
       ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
    }
+
+   ureg_release_temporary(shader, tmp);
+
+   return texel;
+}
+
+static void *
+create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
+{
+   struct ureg_program *shader;
+   struct ureg_dst texel;
+   struct ureg_dst fragment;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /*
+    * texel = fetch_ycbcr()
+    * fragment = texel * scale
+    */
+   texel = fetch_ycbcr(shader);
    ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
 
    ureg_release_temporary(shader, texel);
-   ureg_release_temporary(shader, tmp);
    ureg_END(shader);
 
    return ureg_create_shader_and_destroy(shader, r->pipe);
@@ -216,37 +232,28 @@ static void *
 create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src tc[4];
-   struct ureg_src sampler[4];
+   struct ureg_src tc;
+   struct ureg_src sampler;
    struct ureg_dst texel, ref;
    struct ureg_dst fragment;
-   unsigned i;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return NULL;
 
-   for (i = 0; i < 4; ++i)  {
-      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
-      sampler[i] = ureg_DECL_sampler(shader, i);
-   }
-   texel = ureg_DECL_temporary(shader);
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 4, TGSI_INTERPOLATE_LINEAR);
+   sampler = ureg_DECL_sampler(shader, 3);
+
    ref = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
-    * texel.r = tex(tc[0], sampler[0])
-    * texel.g = tex(tc[1], sampler[1])
-    * texel.b = tex(tc[2], sampler[2])
-    * ref = tex(tc[3], sampler[3])
+    * texel = fetch_ycbcr()
+    * ref = tex(tc, sampler)
     * fragment = texel * scale + ref
     */
-   for (i = 0; i < 3; ++i) {
-      /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
-      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[i], sampler[i]);
-      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_X));
-   }
-   ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
+   texel = fetch_ycbcr(shader);
+   ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc, sampler);
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
 
    ureg_release_temporary(shader, texel);
@@ -260,8 +267,8 @@ static void *
 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src tc[5], line;
-   struct ureg_src sampler[4];
+   struct ureg_src tc[2], line;
+   struct ureg_src sampler;
    struct ureg_dst texel, ref, tmp;
    struct ureg_dst fragment;
    unsigned i, label;
@@ -270,39 +277,32 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    if (!shader)
       return NULL;
 
-   for (i = 0; i < 5; ++i)
-      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+   for (i = 0; i < 2; ++i)
+      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 4, TGSI_INTERPOLATE_LINEAR);
+   sampler = ureg_DECL_sampler(shader, 3);
+
    line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 6, TGSI_INTERPOLATE_LINEAR);
-   for (i = 0; i < 4; ++i)
-      sampler[i] = ureg_DECL_sampler(shader, i);
 
-   texel = ureg_DECL_temporary(shader);
    ref = ureg_DECL_temporary(shader);
    tmp = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
-    * texel.r = tex(tc[0], sampler[0])
-    * texel.g = tex(tc[1], sampler[1])
-    * texel.b = tex(tc[2], sampler[2])
+    * texel = fetch_ycbcr()
     * if(line % 2)
-    *    ref = tex(tc[4], sampler[3])
+    *    ref = tex(tc[1], sampler)
     * else
-    *    ref = tex(tc[3], sampler[3])
+    *    ref = tex(tc[0], sampler)
     * fragment = texel * scale + ref
     */
-   for (i = 0; i < 3; ++i) {
-      /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
-      ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
-      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
-   }
+   texel = fetch_ycbcr(shader);
 
    ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
    ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
    ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
-      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[4], sampler[3]);
+      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[1], sampler);
    ureg_ELSE(shader, &label);
-      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[3], sampler[3]);
+      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[0], sampler);
    ureg_ENDIF(shader);
 
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
@@ -319,9 +319,9 @@ static void *
 create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src tc[5];
-   struct ureg_src sampler[5];
-   struct ureg_dst texel, ref[2], tmp;
+   struct ureg_src tc[2];
+   struct ureg_src sampler[2];
+   struct ureg_dst texel, ref[2];
    struct ureg_dst fragment;
    unsigned i;
 
@@ -329,36 +329,28 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    if (!shader)
       return NULL;
 
-   for (i = 0; i < 5; ++i)  {
-      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
-      sampler[i] = ureg_DECL_sampler(shader, i);
+   for (i = 0; i < 2; ++i)  {
+      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 4, TGSI_INTERPOLATE_LINEAR);
+      sampler[i] = ureg_DECL_sampler(shader, i + 3);
    }
-   texel = ureg_DECL_temporary(shader);
+
    ref[0] = ureg_DECL_temporary(shader);
    ref[1] = ureg_DECL_temporary(shader);
-   tmp = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
-    * texel.r = tex(tc[0], sampler[0])
-    * texel.g = tex(tc[1], sampler[1])
-    * texel.b = tex(tc[2], sampler[2])
+    * texel = fetch_ycbcr()
     * ref[0..1 = tex(tc[3..4], sampler[3..4])
     * ref[0] = lerp(ref[0], ref[1], 0.5)
     * fragment = texel * scale + ref[0]
     */
-   for (i = 0; i < 3; ++i) {
-      /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
-      ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc[i], sampler[i]);
-      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
-   }
-   ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
-   ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[4], sampler[4]);
+   texel = fetch_ycbcr(shader);
+   ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
+   ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]);
    ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
 
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
 
-   ureg_release_temporary(shader, tmp);
    ureg_release_temporary(shader, texel);
    ureg_release_temporary(shader, ref[0]);
    ureg_release_temporary(shader, ref[1]);
@@ -371,8 +363,8 @@ static void *
 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src tc[7], line;
-   struct ureg_src sampler[5];
+   struct ureg_src tc[4], line;
+   struct ureg_src sampler[2];
    struct ureg_dst texel, ref[2], tmp;
    struct ureg_dst fragment;
    unsigned i, label;
@@ -381,11 +373,12 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    if (!shader)
       return NULL;
 
-   for (i = 0; i < 7; ++i)
-      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+   for (i = 0; i < 4; ++i)
+      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 4, TGSI_INTERPOLATE_LINEAR);
+   for (i = 0; i < 2; ++i)
+      sampler[i] = ureg_DECL_sampler(shader, i + 3);
+
    line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 8, TGSI_INTERPOLATE_LINEAR);
-   for (i = 0; i < 5; ++i)
-      sampler[i] = ureg_DECL_sampler(shader, i);
 
    texel = ureg_DECL_temporary(shader);
    ref[0] = ureg_DECL_temporary(shader);
@@ -394,9 +387,7 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
-    * texel.r = tex(tc[0], sampler[0])
-    * texel.g = tex(tc[1], sampler[1])
-    * texel.b = tex(tc[2], sampler[2])
+    * texel = fetch_ycbcr()
     * if(line % 2)
     *    ref[0..1] = tex(tc[4|6], sampler[3..4])
     * else
@@ -404,20 +395,16 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     * ref[0] = lerp(ref[0], ref[1], 0.5)
     * fragment = texel * scale + ref[0]
     */
-   for (i = 0; i < 3; ++i) {
-      /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
-      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[i], sampler[i]);
-      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X));
-   }
+   texel = fetch_ycbcr(shader);
 
    ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
    ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
    ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
-      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[4], sampler[3]);
-      ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[6], sampler[4]);
+      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
+      ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[3], sampler[1]);
    ureg_ELSE(shader, &label);
-      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[3], sampler[3]);
-      ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[5], sampler[4]);
+      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
+      ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[2], sampler[1]);
    ureg_ENDIF(shader);
 
    ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
@@ -521,7 +508,7 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
       memset(&sampler, 0, sizeof(sampler));
       sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
       sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
       sampler.min_img_filter = filters[i];
       sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
       sampler.mag_img_filter = filters[i];
@@ -532,7 +519,10 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
       /*sampler.lod_bias = ; */
       sampler.min_lod = 0;
       /*sampler.max_lod = ; */
-      /*sampler.border_color[i] = ; */
+      sampler.border_color[0] = 0.0f;
+      sampler.border_color[1] = 0.0f;
+      sampler.border_color[2] = 0.0f;
+      sampler.border_color[3] = 0.0f;
       /*sampler.max_anisotropy = ; */
       r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
    }
@@ -611,7 +601,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
 
    memset(&template, 0, sizeof(struct pipe_resource));
-   template.target = PIPE_TEXTURE_2D;
+   template.target = PIPE_TEXTURE_3D;
    /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
    template.format = PIPE_FORMAT_R16_SNORM;
    template.last_level = 0;
-- 
cgit v1.2.3


From 3a247a08e8248ce4128952b78abcd205e42e67c1 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 10 Nov 2010 23:22:56 +0100
Subject: [g3dvl] use clamp to border for empty block handling

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 63 ++++++++++++------------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index f1f7567e2e9..ff71e4f21c3 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -148,7 +148,8 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
    for (i = 0; i < 3; ++i) {
-      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XYZ), vtex[i], ureg_src(scale));
+      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), vpos, ureg_src(scale));
+      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_scalar(vtex[i], TGSI_SWIZZLE_X));
    }
 
    if(count > 0) {
@@ -816,70 +817,70 @@ gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
       for this channel is defined for this block) */
 
    if (!use_zeroblocks || cbp & luma_mask || mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
-      v.x = mbx + offset->x;
-      v.y = mby + offset->y;
+      v.x = 0.0f;
+      v.y = 0.0f;
    }
    else {
-      v.x = zero_blocks[0].x;
-      v.y = zero_blocks[0].y;
+      v.x = -1.0f;
+      v.y = 0.0f;
    }
 
    vb[0].luma_tc.x = v.x;
    vb[0].luma_tc.y = v.y;
    vb[1].luma_tc.x = v.x;
-   vb[1].luma_tc.y = v.y + half.y;
-   vb[2].luma_tc.x = v.x + half.x;
+   vb[1].luma_tc.y = v.y;// + half.y;
+   vb[2].luma_tc.x = v.x;// + half.x;
    vb[2].luma_tc.y = v.y;
-   vb[3].luma_tc.x = v.x + half.x;
+   vb[3].luma_tc.x = v.x;// + half.x;
    vb[3].luma_tc.y = v.y;
    vb[4].luma_tc.x = v.x;
-   vb[4].luma_tc.y = v.y + half.y;
-   vb[5].luma_tc.x = v.x + half.x;
-   vb[5].luma_tc.y = v.y + half.y;
+   vb[4].luma_tc.y = v.y;// + half.y;
+   vb[5].luma_tc.x = v.x;// + half.x;
+   vb[5].luma_tc.y = v.y;// + half.y;
 
    if (!use_zeroblocks || cbp & cb_mask) {
-      v.x = mbx + offset->x;
-      v.y = mby + offset->y;
+      v.x = 0.0f;
+      v.y = 0.0f;
    }
    else {
-      v.x = zero_blocks[1].x;
-      v.y = zero_blocks[1].y;
+      v.x = -1.0f;
+      v.y = 0.0f;
    }
 
    vb[0].cb_tc.x = v.x;
    vb[0].cb_tc.y = v.y;
    vb[1].cb_tc.x = v.x;
-   vb[1].cb_tc.y = v.y + half.y;
-   vb[2].cb_tc.x = v.x + half.x;
+   vb[1].cb_tc.y = v.y;// + half.y;
+   vb[2].cb_tc.x = v.x;// + half.x;
    vb[2].cb_tc.y = v.y;
-   vb[3].cb_tc.x = v.x + half.x;
+   vb[3].cb_tc.x = v.x;// + half.x;
    vb[3].cb_tc.y = v.y;
    vb[4].cb_tc.x = v.x;
-   vb[4].cb_tc.y = v.y + half.y;
-   vb[5].cb_tc.x = v.x + half.x;
-   vb[5].cb_tc.y = v.y + half.y;
+   vb[4].cb_tc.y = v.y;// + half.y;
+   vb[5].cb_tc.x = v.x;// + half.x;
+   vb[5].cb_tc.y = v.y;// + half.y;
 
    if (!use_zeroblocks || cbp & cr_mask) {
-      v.x = mbx + offset->x;
-      v.y = mby + offset->y;
+      v.x = 0.0f;
+      v.y = 0.0f;
    }
    else {
-      v.x = zero_blocks[2].x;
-      v.y = zero_blocks[2].y;
+      v.x = -1.0f;
+      v.y = 0.0f;
    }
 
    vb[0].cr_tc.x = v.x;
    vb[0].cr_tc.y = v.y;
    vb[1].cr_tc.x = v.x;
-   vb[1].cr_tc.y = v.y + half.y;
-   vb[2].cr_tc.x = v.x + half.x;
+   vb[1].cr_tc.y = v.y; // + half.y;
+   vb[2].cr_tc.x = v.x; // + half.x;
    vb[2].cr_tc.y = v.y;
-   vb[3].cr_tc.x = v.x + half.x;
+   vb[3].cr_tc.x = v.x; // + half.x;
    vb[3].cr_tc.y = v.y;
    vb[4].cr_tc.x = v.x;
-   vb[4].cr_tc.y = v.y + half.y;
-   vb[5].cr_tc.x = v.x + half.x;
-   vb[5].cr_tc.y = v.y + half.y;
+   vb[4].cr_tc.y = v.y; // + half.y;
+   vb[5].cr_tc.x = v.x; // + half.x;
+   vb[5].cr_tc.y = v.y; // + half.y;
 }
 
 static void
-- 
cgit v1.2.3


From 1be1aa7ba16b377c065cbb0b49ed4271604ce26a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 11 Nov 2010 00:39:30 +0100
Subject: [g3dvl] cleanup vert_stream_0

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 94 +++++++++---------------
 1 file changed, 35 insertions(+), 59 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index ff71e4f21c3..6843df052f7 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -60,9 +60,9 @@ struct fragment_shader_consts
 struct vert_stream_0
 {
    struct vertex2f pos;
-   struct vertex2f luma_tc;
-   struct vertex2f cb_tc;
-   struct vertex2f cr_tc;
+   float luma_eb;
+   float cb_eb;
+   float cr_eb;
 };
 
 enum MACROBLOCK_TYPE
@@ -642,7 +642,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
       r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
    }
 
-   r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f) * 4;
+   r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vert_stream_0);
    r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
    r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
    /* XXX: Create with usage DYNAMIC or STREAM */
@@ -650,7 +650,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    (
       r->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vertex2f) * 4 * 24 * r->macroblocks_per_batch
+      sizeof(struct vert_stream_0) * 24 * r->macroblocks_per_batch
    );
 
    for (i = 1; i < 3; ++i) {
@@ -678,19 +678,19 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    vertex_elems[1].src_offset = sizeof(struct vertex2f);
    vertex_elems[1].instance_divisor = 0;
    vertex_elems[1].vertex_buffer_index = 0;
-   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[1].src_format = PIPE_FORMAT_R32_FLOAT;
 
    /* Chroma Cr texcoord element */
-   vertex_elems[2].src_offset = sizeof(struct vertex2f) * 2;
+   vertex_elems[2].src_offset = sizeof(struct vertex2f) + sizeof(float);
    vertex_elems[2].instance_divisor = 0;
    vertex_elems[2].vertex_buffer_index = 0;
-   vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[2].src_format = PIPE_FORMAT_R32_FLOAT;
 
    /* Chroma Cb texcoord element */
-   vertex_elems[3].src_offset = sizeof(struct vertex2f) * 3;
+   vertex_elems[3].src_offset = sizeof(struct vertex2f) + sizeof(float) * 2;
    vertex_elems[3].instance_divisor = 0;
    vertex_elems[3].vertex_buffer_index = 0;
-   vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[3].src_format = PIPE_FORMAT_R32_FLOAT;
 
    /* First ref surface top field texcoord element */
    vertex_elems[4].src_offset = 0;
@@ -774,7 +774,7 @@ get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
    return -1;
 }
 
-static void
+void
 gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
                 const struct vertex2f *offset,
                 unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
@@ -818,72 +818,48 @@ gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
 
    if (!use_zeroblocks || cbp & luma_mask || mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
       v.x = 0.0f;
-      v.y = 0.0f;
    }
    else {
       v.x = -1.0f;
-      v.y = 0.0f;
    }
 
-   vb[0].luma_tc.x = v.x;
-   vb[0].luma_tc.y = v.y;
-   vb[1].luma_tc.x = v.x;
-   vb[1].luma_tc.y = v.y;// + half.y;
-   vb[2].luma_tc.x = v.x;// + half.x;
-   vb[2].luma_tc.y = v.y;
-   vb[3].luma_tc.x = v.x;// + half.x;
-   vb[3].luma_tc.y = v.y;
-   vb[4].luma_tc.x = v.x;
-   vb[4].luma_tc.y = v.y;// + half.y;
-   vb[5].luma_tc.x = v.x;// + half.x;
-   vb[5].luma_tc.y = v.y;// + half.y;
+   vb[0].luma_eb = v.x;
+   vb[1].luma_eb = v.x;
+   vb[2].luma_eb = v.x;
+   vb[3].luma_eb = v.x;
+   vb[4].luma_eb = v.x;
+   vb[5].luma_eb = v.x;
 
    if (!use_zeroblocks || cbp & cb_mask) {
       v.x = 0.0f;
-      v.y = 0.0f;
    }
    else {
       v.x = -1.0f;
-      v.y = 0.0f;
    }
 
-   vb[0].cb_tc.x = v.x;
-   vb[0].cb_tc.y = v.y;
-   vb[1].cb_tc.x = v.x;
-   vb[1].cb_tc.y = v.y;// + half.y;
-   vb[2].cb_tc.x = v.x;// + half.x;
-   vb[2].cb_tc.y = v.y;
-   vb[3].cb_tc.x = v.x;// + half.x;
-   vb[3].cb_tc.y = v.y;
-   vb[4].cb_tc.x = v.x;
-   vb[4].cb_tc.y = v.y;// + half.y;
-   vb[5].cb_tc.x = v.x;// + half.x;
-   vb[5].cb_tc.y = v.y;// + half.y;
+   vb[0].cb_eb = v.x;
+   vb[1].cb_eb = v.x;
+   vb[2].cb_eb = v.x;
+   vb[3].cb_eb = v.x;
+   vb[4].cb_eb = v.x;
+   vb[5].cb_eb = v.x;
 
    if (!use_zeroblocks || cbp & cr_mask) {
       v.x = 0.0f;
-      v.y = 0.0f;
    }
    else {
       v.x = -1.0f;
-      v.y = 0.0f;
    }
 
-   vb[0].cr_tc.x = v.x;
-   vb[0].cr_tc.y = v.y;
-   vb[1].cr_tc.x = v.x;
-   vb[1].cr_tc.y = v.y; // + half.y;
-   vb[2].cr_tc.x = v.x; // + half.x;
-   vb[2].cr_tc.y = v.y;
-   vb[3].cr_tc.x = v.x; // + half.x;
-   vb[3].cr_tc.y = v.y;
-   vb[4].cr_tc.x = v.x;
-   vb[4].cr_tc.y = v.y; // + half.y;
-   vb[5].cr_tc.x = v.x; // + half.x;
-   vb[5].cr_tc.y = v.y; // + half.y;
+   vb[0].cr_eb = v.x;
+   vb[1].cr_eb = v.x;
+   vb[2].cr_eb = v.x;
+   vb[3].cr_eb = v.x;
+   vb[4].cr_eb = v.x;
+   vb[5].cr_eb = v.x;
 }
 
-static void
+void
 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
                      struct pipe_mpeg12_macroblock *mb, unsigned pos,
                      struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
@@ -1284,12 +1260,12 @@ grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
 static void
 fill_frame_zero_block(short *dst, unsigned dst_pitch)
 {
-   unsigned y;
-
-   assert(dst);
+   //unsigned y;
+   //
+   //assert(dst);
 
-   for (y = 0; y < BLOCK_HEIGHT; ++y)
-      memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
+   //for (y = 0; y < BLOCK_HEIGHT; ++y)
+   //   memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
 }
 
 static void
-- 
cgit v1.2.3


From 4c90c039f005517c5ae68167d9b3de624557c817 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 11 Nov 2010 11:53:12 +0100
Subject: [g3dvl] move the rest of the calculations into the vertex shader

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 258 +++++++++++------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   4 +-
 2 files changed, 124 insertions(+), 138 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 6843df052f7..7cd6934e8fa 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -78,12 +78,29 @@ enum MACROBLOCK_TYPE
    NUM_MACROBLOCK_TYPES
 };
 
+/* vertices for two triangles covering four block */
+static const struct vertex2f const_rectangle[4][2][3] = {
+   {
+      { {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f} },
+      { {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f} }
+   }, {
+      { {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f} },
+      { {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f} }
+   }, {
+      { {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f} },
+      { {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f} }
+   }, {
+      { {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f} },
+      { {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} }
+   }
+};
+
 static void *
 create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
 {
    struct ureg_program *shader;
    struct ureg_src norm, mbs;
-   struct ureg_src vpos, vtex[3], vmv[4];
+   struct ureg_src vrect, vpos, vtex[3], vmv[4];
    struct ureg_dst t_vpos, scale;
    struct ureg_dst o_vpos, o_vtex[3], o_vmv[4], o_line;
    unsigned i, j, count;
@@ -99,11 +116,12 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    t_vpos = ureg_DECL_temporary(shader);
    scale = ureg_DECL_temporary(shader);
 
-   vpos = ureg_DECL_vs_input(shader, 0);
+   vrect = ureg_DECL_vs_input(shader, 0);
+   vpos = ureg_DECL_vs_input(shader, 1);
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
 
    for (i = 0; i < 3; ++i) {
-      vtex[i] = ureg_DECL_vs_input(shader, 1 + i);
+      vtex[i] = ureg_DECL_vs_input(shader, 2 + i);
       o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1 + i);
    }
    
@@ -111,13 +129,13 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    for (i = 0; i < ref_frames; ++i) {
       for (j = 0; j < 2; ++j) {        
         if(j < mv_per_frame) {
-           vmv[count] = ureg_DECL_vs_input(shader, 4 + i * 2 + j);
+           vmv[count] = ureg_DECL_vs_input(shader, 5 + i * 2 + j);
            o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
            count++;
         }
         /* workaround for r600g */
         else if(ref_frames == 2)
-           ureg_DECL_vs_input(shader, 4 + i * 2 + j);
+           ureg_DECL_vs_input(shader, 5 + i * 2 + j);
       }
    }
 
@@ -126,10 +144,12 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    /*
     * scale = norm * mbs;
     *
-    * t_vpos = vpos * scale
-    * o_vpos = t_vpos
+    * t_vpos = (vpos + vrect) * scale
+    * o_vpos.xy = t_vpos
+    * o_vpos.zw = vpos
     *
-    * o_vtex[0..2] = vtex[0..2] * scale
+    * o_vtex[0..2].xy = t_vpos
+    * o_vtex[0..2].z = vtex[0..2].z
     *
     * if(count > 0) { // Apply motion vectors
     *    scale = norm * 0.5;
@@ -143,12 +163,13 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
     */
    ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, mbs);
 
-   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, ureg_src(scale));
+   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
+   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), ureg_src(scale));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
    for (i = 0; i < 3; ++i) {
-      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), vpos, ureg_src(scale));
+      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
       ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_scalar(vtex[i], TGSI_SWIZZLE_X));
    }
 
@@ -160,7 +181,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 
    if (interlaced) {
       ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f));
-      ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vpos, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
+      ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
    }
 
    ureg_release_temporary(shader, t_vpos);
@@ -584,7 +605,7 @@ static bool
 init_buffers(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_resource template;
-   struct pipe_vertex_element vertex_elems[8];
+   struct pipe_vertex_element vertex_elems[9];
    struct pipe_sampler_view sampler_view;
 
    const unsigned mbw =
@@ -642,6 +663,16 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
       r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
    }
 
+   r->vertex_bufs.individual.rect.stride = sizeof(struct vertex2f);
+   r->vertex_bufs.individual.rect.max_index = 24 * r->macroblocks_per_batch - 1;
+   r->vertex_bufs.individual.rect.buffer_offset = 0;
+   r->vertex_bufs.individual.rect.buffer = pipe_buffer_create
+   (
+      r->pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      sizeof(struct vertex2f) * 24 * r->macroblocks_per_batch
+   );
+
    r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vert_stream_0);
    r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
    r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
@@ -653,12 +684,12 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
       sizeof(struct vert_stream_0) * 24 * r->macroblocks_per_batch
    );
 
-   for (i = 1; i < 3; ++i) {
-      r->vertex_bufs.all[i].stride = sizeof(struct vertex2f) * 2;
-      r->vertex_bufs.all[i].max_index = 24 * r->macroblocks_per_batch - 1;
-      r->vertex_bufs.all[i].buffer_offset = 0;
+   for (i = 0; i < 2; ++i) {
+      r->vertex_bufs.individual.ref[i].stride = sizeof(struct vertex2f) * 2;
+      r->vertex_bufs.individual.ref[i].max_index = 24 * r->macroblocks_per_batch - 1;
+      r->vertex_bufs.individual.ref[i].buffer_offset = 0;
       /* XXX: Create with usage DYNAMIC or STREAM */
-      r->vertex_bufs.all[i].buffer = pipe_buffer_create
+      r->vertex_bufs.individual.ref[i].buffer = pipe_buffer_create
       (
          r->pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
@@ -668,57 +699,63 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
 
    memset(&vertex_elems, 0, sizeof(vertex_elems));
 
-   /* Position element */
+   /* Rectangle element */
    vertex_elems[0].src_offset = 0;
    vertex_elems[0].instance_divisor = 0;
    vertex_elems[0].vertex_buffer_index = 0;
    vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /* Luma, texcoord element */
-   vertex_elems[1].src_offset = sizeof(struct vertex2f);
+   /* Position element */
+   vertex_elems[1].src_offset = 0;
    vertex_elems[1].instance_divisor = 0;
-   vertex_elems[1].vertex_buffer_index = 0;
-   vertex_elems[1].src_format = PIPE_FORMAT_R32_FLOAT;
+   vertex_elems[1].vertex_buffer_index = 1;
+   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /* Chroma Cr texcoord element */
-   vertex_elems[2].src_offset = sizeof(struct vertex2f) + sizeof(float);
+   /* Luma, texcoord element */
+   vertex_elems[2].src_offset = sizeof(struct vertex2f);
    vertex_elems[2].instance_divisor = 0;
-   vertex_elems[2].vertex_buffer_index = 0;
+   vertex_elems[2].vertex_buffer_index = 1;
    vertex_elems[2].src_format = PIPE_FORMAT_R32_FLOAT;
 
-   /* Chroma Cb texcoord element */
-   vertex_elems[3].src_offset = sizeof(struct vertex2f) + sizeof(float) * 2;
+   /* Chroma Cr texcoord element */
+   vertex_elems[3].src_offset = sizeof(struct vertex2f) + sizeof(float);
    vertex_elems[3].instance_divisor = 0;
-   vertex_elems[3].vertex_buffer_index = 0;
+   vertex_elems[3].vertex_buffer_index = 1;
    vertex_elems[3].src_format = PIPE_FORMAT_R32_FLOAT;
 
-   /* First ref surface top field texcoord element */
-   vertex_elems[4].src_offset = 0;
+   /* Chroma Cb texcoord element */
+   vertex_elems[4].src_offset = sizeof(struct vertex2f) + sizeof(float) * 2;
    vertex_elems[4].instance_divisor = 0;
    vertex_elems[4].vertex_buffer_index = 1;
-   vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[4].src_format = PIPE_FORMAT_R32_FLOAT;
 
-   /* First ref surface bottom field texcoord element */
-   vertex_elems[5].src_offset = sizeof(struct vertex2f);
+   /* First ref surface top field texcoord element */
+   vertex_elems[5].src_offset = 0;
    vertex_elems[5].instance_divisor = 0;
-   vertex_elems[5].vertex_buffer_index = 1;
+   vertex_elems[5].vertex_buffer_index = 2;
    vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /* Second ref surface top field texcoord element */
-   vertex_elems[6].src_offset = 0;
+   /* First ref surface bottom field texcoord element */
+   vertex_elems[6].src_offset = sizeof(struct vertex2f);
    vertex_elems[6].instance_divisor = 0;
    vertex_elems[6].vertex_buffer_index = 2;
    vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /* Second ref surface bottom field texcoord element */
-   vertex_elems[7].src_offset = sizeof(struct vertex2f);
+   /* Second ref surface top field texcoord element */
+   vertex_elems[7].src_offset = 0;
    vertex_elems[7].instance_divisor = 0;
-   vertex_elems[7].vertex_buffer_index = 2;
+   vertex_elems[7].vertex_buffer_index = 3;
    vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 4, vertex_elems);
-   r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 6, vertex_elems);
-   r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 8, vertex_elems);
+   /* Second ref surface bottom field texcoord element */
+   vertex_elems[8].src_offset = sizeof(struct vertex2f);
+   vertex_elems[8].instance_divisor = 0;
+   vertex_elems[8].vertex_buffer_index = 3;
+   vertex_elems[8].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 5, vertex_elems);
+   r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 7, vertex_elems);
+   r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 9, vertex_elems);
 
    r->vs_const_buf = pipe_buffer_create
    (
@@ -776,87 +813,40 @@ get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
 
 void
 gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
-                const struct vertex2f *offset,
-                unsigned luma_mask, unsigned cb_mask, unsigned cr_mask,
-                bool use_zeroblocks, struct vertex2f *zero_blocks)
+                unsigned luma_mask, unsigned cb_mask, unsigned cr_mask)
 {
    unsigned cbp = mb->cbp;
-   unsigned mbx = mb->mbx;
-   unsigned mby = mb->mby;
-
-   const struct vertex2f half =
-   {
-      0.5f,
-      0.5f
-   };
-   struct vertex2f v;
+   unsigned i;
+   struct vert_stream_0 v;
 
    assert(vb);
-   assert(offset);
-   assert(zero_blocks || !use_zeroblocks);
-
-   /* Generate vertices for two triangles covering a block */
-   v.x = mbx + offset->x;
-   v.y = mby + offset->y;
-
-   vb[0].pos.x = v.x;
-   vb[0].pos.y = v.y;
-   vb[1].pos.x = v.x;
-   vb[1].pos.y = v.y + half.y;
-   vb[2].pos.x = v.x + half.x;
-   vb[2].pos.y = v.y;
-   vb[3].pos.x = v.x + half.x;
-   vb[3].pos.y = v.y;
-   vb[4].pos.x = v.x;
-   vb[4].pos.y = v.y + half.y;
-   vb[5].pos.x = v.x + half.x;
-   vb[5].pos.y = v.y + half.y;
-
-   /* Generate texcoords for the triangles, either pointing to the correct area on the luma/chroma texture
-      or if zero blocks are being used, to the zero block if the appropriate CBP bits aren't set (i.e. no data
-      for this channel is defined for this block) */
-
-   if (!use_zeroblocks || cbp & luma_mask || mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
-      v.x = 0.0f;
+
+   v.pos.x = mb->mbx;
+   v.pos.y = mb->mby;
+
+   if (cbp & luma_mask || mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
+      v.luma_eb = 0.0f;
    }
    else {
-      v.x = -1.0f;
+      v.luma_eb = -1.0f;
    }
 
-   vb[0].luma_eb = v.x;
-   vb[1].luma_eb = v.x;
-   vb[2].luma_eb = v.x;
-   vb[3].luma_eb = v.x;
-   vb[4].luma_eb = v.x;
-   vb[5].luma_eb = v.x;
-
-   if (!use_zeroblocks || cbp & cb_mask) {
-      v.x = 0.0f;
+   if (cbp & cb_mask) {
+      v.cb_eb = 0.0f;
    }
    else {
-      v.x = -1.0f;
+      v.cb_eb = -1.0f;
    }
 
-   vb[0].cb_eb = v.x;
-   vb[1].cb_eb = v.x;
-   vb[2].cb_eb = v.x;
-   vb[3].cb_eb = v.x;
-   vb[4].cb_eb = v.x;
-   vb[5].cb_eb = v.x;
-
-   if (!use_zeroblocks || cbp & cr_mask) {
-      v.x = 0.0f;
+   if (cbp & cr_mask) {
+      v.cr_eb = 0.0f;
    }
    else {
-      v.x = -1.0f;
+      v.cr_eb = -1.0f;
    }
 
-   vb[0].cr_eb = v.x;
-   vb[1].cr_eb = v.x;
-   vb[2].cr_eb = v.x;
-   vb[3].cr_eb = v.x;
-   vb[4].cr_eb = v.x;
-   vb[5].cr_eb = v.x;
+   for ( i = 0; i < 6; ++i )
+     memcpy(vb + i, &v, sizeof(v));
 }
 
 void
@@ -970,30 +960,12 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
       }
       case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
       {
-         const struct vertex2f offsets[2][2] =
-         {
-            {
-               {0, 0}, {0, 0.5f}
-            },
-            {
-               {0.5f, 0}, {0.5f, 0.5f}
-            }
-         };
-         const bool use_zb = r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE;
-
          struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
 
-         gen_block_verts(vb, mb, &offsets[0][0],
-                         32, 2, 1, use_zb, r->zero_block);
-
-         gen_block_verts(vb + 6, mb, &offsets[1][0],
-                         16, 2, 1, use_zb, r->zero_block);
-
-         gen_block_verts(vb + 12, mb, &offsets[0][1],
-                         8, 2, 1, use_zb, r->zero_block);
-
-         gen_block_verts(vb + 18, mb, &offsets[1][1],
-                         4, 2, 1, use_zb, r->zero_block);
+         gen_block_verts(vb, mb, 32, 2, 1);
+         gen_block_verts(vb + 6, mb, 16, 2, 1);
+         gen_block_verts(vb + 12, mb, 8, 2, 1);
+         gen_block_verts(vb + 18, mb, 4, 2, 1);
 
          break;
       }
@@ -1090,11 +1062,25 @@ flush(struct vl_mpeg12_mc_renderer *r)
    unsigned vb_start = 0;
    struct vertex_shader_consts *vs_consts;
    struct pipe_transfer *buf_transfer;
+   struct vertex2f *rect;
    unsigned i;
 
    assert(r);
    assert(r->num_macroblocks == r->macroblocks_per_batch);
 
+   rect = pipe_buffer_map
+   (
+      r->pipe,
+      r->vertex_bufs.individual.rect.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   for ( i = 0; i < r->macroblocks_per_batch; ++i)
+     memcpy(rect + i * 24, &const_rectangle, sizeof(const_rectangle));
+   
+   pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.rect.buffer, buf_transfer);
+
    gen_macroblock_stream(r, num_macroblocks);
 
    r->fb_state.cbufs[0] = r->surface;
@@ -1118,7 +1104,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
                                 r->vs_const_buf);
 
    if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 1, r->vertex_bufs.all);
+      r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
       r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
@@ -1131,7 +1117,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
+      r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->past->texture;
       r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
@@ -1146,7 +1132,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
+      r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->past->texture;
       r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
@@ -1161,7 +1147,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
+      r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->future->texture;
       r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
@@ -1176,7 +1162,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
+      r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->future->texture;
       r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
@@ -1191,7 +1177,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
+      r->pipe->set_vertex_buffers(r->pipe, 4, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
       r->textures.individual.ref[0] = r->past->texture;
       r->textures.individual.ref[1] = r->future->texture;
@@ -1208,7 +1194,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
+      r->pipe->set_vertex_buffers(r->pipe, 4, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
       r->textures.individual.ref[0] = r->past->texture;
       r->textures.individual.ref[1] = r->future->texture;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 3a56529aa6b..55441591988 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -94,8 +94,8 @@ struct vl_mpeg12_mc_renderer
 
    union
    {
-      struct pipe_vertex_buffer all[3];
-      struct { struct pipe_vertex_buffer ycbcr, ref[2]; } individual;
+      struct pipe_vertex_buffer all[4];
+      struct { struct pipe_vertex_buffer rect, ycbcr, ref[2]; } individual;
    } vertex_bufs;
 
    struct pipe_surface *surface, *past, *future;
-- 
cgit v1.2.3


From 745906257a5cfc5945e7d373ea2684ccf26860b5 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 11 Nov 2010 12:32:44 +0100
Subject: [g3dvl] use only one vertex element for ycbcr z-coord

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 65 ++++++++++--------------
 1 file changed, 26 insertions(+), 39 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 7cd6934e8fa..a6fe0661200 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -100,7 +100,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 {
    struct ureg_program *shader;
    struct ureg_src norm, mbs;
-   struct ureg_src vrect, vpos, vtex[3], vmv[4];
+   struct ureg_src vrect, vpos, vtex, vmv[4];
    struct ureg_dst t_vpos, scale;
    struct ureg_dst o_vpos, o_vtex[3], o_vmv[4], o_line;
    unsigned i, j, count;
@@ -118,24 +118,23 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 
    vrect = ureg_DECL_vs_input(shader, 0);
    vpos = ureg_DECL_vs_input(shader, 1);
-   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+   vtex = ureg_DECL_vs_input(shader, 2);
 
-   for (i = 0; i < 3; ++i) {
-      vtex[i] = ureg_DECL_vs_input(shader, 2 + i);
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+   for (i = 0; i < 3; ++i)
       o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1 + i);
-   }
    
    count=0;
    for (i = 0; i < ref_frames; ++i) {
       for (j = 0; j < 2; ++j) {        
         if(j < mv_per_frame) {
-           vmv[count] = ureg_DECL_vs_input(shader, 5 + i * 2 + j);
+           vmv[count] = ureg_DECL_vs_input(shader, 3 + i * 2 + j);
            o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
            count++;
         }
         /* workaround for r600g */
         else if(ref_frames == 2)
-           ureg_DECL_vs_input(shader, 5 + i * 2 + j);
+           ureg_DECL_vs_input(shader, 3 + i * 2 + j);
       }
    }
 
@@ -149,7 +148,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
     * o_vpos.zw = vpos
     *
     * o_vtex[0..2].xy = t_vpos
-    * o_vtex[0..2].z = vtex[0..2].z
+    * o_vtex[0..2].z = vtex[0..2].[xyz]
     *
     * if(count > 0) { // Apply motion vectors
     *    scale = norm * 0.5;
@@ -170,7 +169,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 
    for (i = 0; i < 3; ++i) {
       ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_scalar(vtex[i], TGSI_SWIZZLE_X));
+      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_X + i));
    }
 
    if(count > 0) {
@@ -605,7 +604,7 @@ static bool
 init_buffers(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_resource template;
-   struct pipe_vertex_element vertex_elems[9];
+   struct pipe_vertex_element vertex_elems[7];
    struct pipe_sampler_view sampler_view;
 
    const unsigned mbw =
@@ -711,51 +710,39 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    vertex_elems[1].vertex_buffer_index = 1;
    vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /* Luma, texcoord element */
+   /* y, cr, cb z-coordinate element */
    vertex_elems[2].src_offset = sizeof(struct vertex2f);
    vertex_elems[2].instance_divisor = 0;
    vertex_elems[2].vertex_buffer_index = 1;
-   vertex_elems[2].src_format = PIPE_FORMAT_R32_FLOAT;
+   vertex_elems[2].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
 
-   /* Chroma Cr texcoord element */
-   vertex_elems[3].src_offset = sizeof(struct vertex2f) + sizeof(float);
+   /* First ref surface top field texcoord element */
+   vertex_elems[3].src_offset = 0;
    vertex_elems[3].instance_divisor = 0;
-   vertex_elems[3].vertex_buffer_index = 1;
-   vertex_elems[3].src_format = PIPE_FORMAT_R32_FLOAT;
+   vertex_elems[3].vertex_buffer_index = 2;
+   vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /* Chroma Cb texcoord element */
-   vertex_elems[4].src_offset = sizeof(struct vertex2f) + sizeof(float) * 2;
+   /* First ref surface bottom field texcoord element */
+   vertex_elems[4].src_offset = sizeof(struct vertex2f);
    vertex_elems[4].instance_divisor = 0;
-   vertex_elems[4].vertex_buffer_index = 1;
-   vertex_elems[4].src_format = PIPE_FORMAT_R32_FLOAT;
+   vertex_elems[4].vertex_buffer_index = 2;
+   vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /* First ref surface top field texcoord element */
+   /* Second ref surface top field texcoord element */
    vertex_elems[5].src_offset = 0;
    vertex_elems[5].instance_divisor = 0;
-   vertex_elems[5].vertex_buffer_index = 2;
+   vertex_elems[5].vertex_buffer_index = 3;
    vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /* First ref surface bottom field texcoord element */
+   /* Second ref surface bottom field texcoord element */
    vertex_elems[6].src_offset = sizeof(struct vertex2f);
    vertex_elems[6].instance_divisor = 0;
-   vertex_elems[6].vertex_buffer_index = 2;
+   vertex_elems[6].vertex_buffer_index = 3;
    vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /* Second ref surface top field texcoord element */
-   vertex_elems[7].src_offset = 0;
-   vertex_elems[7].instance_divisor = 0;
-   vertex_elems[7].vertex_buffer_index = 3;
-   vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Second ref surface bottom field texcoord element */
-   vertex_elems[8].src_offset = sizeof(struct vertex2f);
-   vertex_elems[8].instance_divisor = 0;
-   vertex_elems[8].vertex_buffer_index = 3;
-   vertex_elems[8].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 5, vertex_elems);
-   r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 7, vertex_elems);
-   r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 9, vertex_elems);
+   r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 3, vertex_elems);
+   r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 5, vertex_elems);
+   r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 7, vertex_elems);
 
    r->vs_const_buf = pipe_buffer_create
    (
-- 
cgit v1.2.3


From e406936b9ec46f1b8a41d02edd15f384bb739e32 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 11 Nov 2010 12:49:47 +0100
Subject: [g3dvl] remove empty block handling for now

Maybe this isn't going into the right direction,
but it makes handling the code easier for now.
---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 58 ++----------------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h | 10 ----
 src/gallium/drivers/nv40/nv40_video_context.c    |  1 -
 src/gallium/drivers/nvfx/nvfx_video_context.c    |  1 -
 src/gallium/drivers/r300/r300_video_context.c    |  1 -
 src/gallium/drivers/r600/r600_video_context.c    |  1 -
 src/gallium/drivers/softpipe/sp_video_context.c  |  8 +---
 src/gallium/drivers/softpipe/sp_video_context.h  |  1 -
 8 files changed, 5 insertions(+), 76 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index a6fe0661200..4616e483f74 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -42,8 +42,6 @@
 #define MACROBLOCK_HEIGHT 16
 #define BLOCK_WIDTH 8
 #define BLOCK_HEIGHT 8
-#define ZERO_BLOCK_NIL -1.0f
-#define ZERO_BLOCK_IS_NIL(zb) ((zb).x < 0.0f)
 #define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
 
 struct vertex_shader_consts
@@ -512,8 +510,7 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    /* Luma filter */
    filters[0] = PIPE_TEX_FILTER_NEAREST;
    /* Chroma filters */
-   if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 ||
-       r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
+   if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 || true) { //TODO
       filters[1] = PIPE_TEX_FILTER_NEAREST;
       filters[2] = PIPE_TEX_FILTER_NEAREST;
    }
@@ -1199,10 +1196,6 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
 
-   if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE)
-      for (i = 0; i < 3; ++i)
-         r->zero_block[i].x = ZERO_BLOCK_NIL;
-
    r->num_macroblocks = 0;
 }
 
@@ -1230,17 +1223,6 @@ grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
       memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
 }
 
-static void
-fill_frame_zero_block(short *dst, unsigned dst_pitch)
-{
-   //unsigned y;
-   //
-   //assert(dst);
-
-   //for (y = 0; y < BLOCK_HEIGHT; ++y)
-   //   memset(dst + y * dst_pitch, 0, BLOCK_WIDTH * 2);
-}
-
 static void
 fill_field_zero_block(short *dst, unsigned dst_pitch)
 {
@@ -1284,23 +1266,8 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
 
             ++sb;
          }
-         else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
-            if(dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
-
-               if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
-                   ZERO_BLOCK_IS_NIL(r->zero_block[0])) {
-
-                  fill_frame_zero_block(texels + y * tex_pitch * BLOCK_WIDTH + x * BLOCK_WIDTH, tex_pitch);
-                  if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
-                     r->zero_block[0].x = (mbx + x * 0.5f);
-                     r->zero_block[0].y = (mby + y * 0.5f);
-                  }
-               }
-            }
-            else {
-
-               fill_field_zero_block(texels + y * tex_pitch + x * BLOCK_WIDTH, tex_pitch);
-            }
+         else if(dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
+            fill_field_zero_block(texels + y * tex_pitch + x * BLOCK_WIDTH, tex_pitch);
          }
       }
    }
@@ -1319,16 +1286,6 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
          grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
          ++sb;
       }
-      else if (r->eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE) {
-         if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL ||
-             ZERO_BLOCK_IS_NIL(r->zero_block[tb + 1])) {
-            fill_frame_zero_block(texels, tex_pitch);
-            if (r->eb_handling == VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE) {
-               r->zero_block[tb + 1].x = mbx;
-               r->zero_block[tb + 1].y = mby;
-            }
-         }
-      }
    }
 }
 
@@ -1371,18 +1328,12 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            unsigned picture_height,
                            enum pipe_video_chroma_format chroma_format,
                            enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
-                           enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
                            bool pot_buffers)
 {
-   unsigned i;
-
    assert(renderer);
    assert(pipe);
    /* TODO: Implement other policies */
    assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
-   /* TODO: Implement this */
-   /* XXX: XFER_ALL sampling issue at block edges when using bilinear filtering */
-   assert(eb_handling != VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE);
    /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
    assert(pot_buffers);
 
@@ -1393,7 +1344,6 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    renderer->picture_height = picture_height;
    renderer->chroma_format = chroma_format;
    renderer->bufmode = bufmode;
-   renderer->eb_handling = eb_handling;
    renderer->pot_buffers = pot_buffers;
 
    renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
@@ -1420,8 +1370,6 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    renderer->surface = NULL;
    renderer->past = NULL;
    renderer->future = NULL;
-   for (i = 0; i < 3; ++i)
-      renderer->zero_block[i].x = ZERO_BLOCK_NIL;
    renderer->num_macroblocks = 0;
 
    xfer_buffers_map(renderer);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 55441591988..50c38f9ff10 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -44,13 +44,6 @@ enum VL_MPEG12_MC_RENDERER_BUFFER_MODE
    VL_MPEG12_MC_RENDERER_BUFFER_PICTURE /* Larger batches, more memory */
 };
 
-enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK
-{
-   VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ALL, /* Waste of memory bandwidth */
-   VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE, /* Can only do point-filtering when interpolating subsampled chroma channels */
-   VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_NONE /* Needs conditional texel fetch! */
-};
-
 struct vl_mpeg12_mc_renderer
 {
    struct pipe_context *pipe;
@@ -58,7 +51,6 @@ struct vl_mpeg12_mc_renderer
    unsigned picture_height;
    enum pipe_video_chroma_format chroma_format;
    enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode;
-   enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling;
    bool pot_buffers;
    unsigned macroblocks_per_batch;
 
@@ -104,7 +96,6 @@ struct vl_mpeg12_mc_renderer
    struct pipe_mpeg12_macroblock *macroblock_buf;
    struct pipe_transfer *tex_transfer[3];
    short *texels[3];
-   struct vertex2f zero_block[3];
 
    struct keymap *texview_map;
 };
@@ -115,7 +106,6 @@ bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                                 unsigned picture_height,
                                 enum pipe_video_chroma_format chroma_format,
                                 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
-                                enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
                                 bool pot_buffers);
 
 void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
diff --git a/src/gallium/drivers/nv40/nv40_video_context.c b/src/gallium/drivers/nv40/nv40_video_context.c
index 15a26ea3b3b..e6e57ee787c 100644
--- a/src/gallium/drivers/nv40/nv40_video_context.c
+++ b/src/gallium/drivers/nv40/nv40_video_context.c
@@ -43,6 +43,5 @@ nv40_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
 
    return sp_video_create_ex(pipe, profile, chroma_format, width, height,
                              VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
-                             VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
                              true);
 }
diff --git a/src/gallium/drivers/nvfx/nvfx_video_context.c b/src/gallium/drivers/nvfx/nvfx_video_context.c
index 9212ae57fc0..58e1c0baa27 100644
--- a/src/gallium/drivers/nvfx/nvfx_video_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_video_context.c
@@ -43,7 +43,6 @@ nvfx_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
 
    return sp_video_create_ex(pipe, profile, chroma_format, width, height,
                              VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
-                             VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
                              true,
                              PIPE_FORMAT_VUYX);
 }
diff --git a/src/gallium/drivers/r300/r300_video_context.c b/src/gallium/drivers/r300/r300_video_context.c
index 622f1b8820b..9fe6d6fcf25 100644
--- a/src/gallium/drivers/r300/r300_video_context.c
+++ b/src/gallium/drivers/r300/r300_video_context.c
@@ -267,7 +267,6 @@ r300_mpeg12_context_create(struct pipe_screen *screen,
     if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
                                    width, height, chroma_format,
                                    VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
-                                   VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
                                    true))
     {
         ctx->pipe->destroy(ctx->pipe);
diff --git a/src/gallium/drivers/r600/r600_video_context.c b/src/gallium/drivers/r600/r600_video_context.c
index 2bbf622052b..b3885db0f55 100644
--- a/src/gallium/drivers/r600/r600_video_context.c
+++ b/src/gallium/drivers/r600/r600_video_context.c
@@ -16,7 +16,6 @@ r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
 
    return sp_video_create_ex(pipe, profile, chroma_format, width, height,
                              VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
-                             VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
                              true,
                              PIPE_FORMAT_VUYX);
 }
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index ff217d66c85..3edab823677 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -423,7 +423,6 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
                  enum pipe_video_chroma_format chroma_format,
                  unsigned width, unsigned height,
                  enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
-                 enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
                  bool pot_buffers,
                  enum pipe_format decode_format)
 {
@@ -466,7 +465,7 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
 
    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
                                    width, height, chroma_format,
-                                   bufmode, eb_handling, pot_buffers)) {
+                                   bufmode, pot_buffers)) {
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
@@ -505,12 +504,10 @@ sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
       return NULL;
 
    /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture with softpipe */
-   /* TODO: Use XFER_NONE when implemented */
    return sp_video_create_ex(pipe, profile,
                              chroma_format,
                              width, height,
                              VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
-                             VL_MPEG12_MC_RENDERER_EMPTY_BLOCK_XFER_ONE,
                              true,
                              PIPE_FORMAT_XYUV);
 }
@@ -520,7 +517,6 @@ sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
                    enum pipe_video_chroma_format chroma_format,
                    unsigned width, unsigned height,
                    enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
-                   enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
                    bool pot_buffers,
                    enum pipe_format decode_format)
 {
@@ -532,7 +528,7 @@ sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
          return sp_mpeg12_create(pipe, profile,
                                  chroma_format,
                                  width, height,
-                                 bufmode, eb_handling,
+                                 bufmode,
                                  pot_buffers,
                                  decode_format);
       default:
diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h
index 0fe48d7a872..dbf1bc1d8dd 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.h
+++ b/src/gallium/drivers/softpipe/sp_video_context.h
@@ -62,7 +62,6 @@ sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
                    enum pipe_video_chroma_format chroma_format,
                    unsigned width, unsigned height,
                    enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
-                   enum VL_MPEG12_MC_RENDERER_EMPTY_BLOCK eb_handling,
                    bool pot_buffers,
                    enum pipe_format decode_format);
 
-- 
cgit v1.2.3


From 99b57bc20e839043ec6160502814085a0976fccc Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 11 Nov 2010 15:01:27 +0100
Subject: [g3dvl] move stuff from flush into own functions

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 96 +++++++++++++++---------
 1 file changed, 59 insertions(+), 37 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 4616e483f74..0ac258c9667 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -751,6 +751,29 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    return true;
 }
 
+static bool
+init_const_buffers(struct vl_mpeg12_mc_renderer *r)
+{
+   struct pipe_transfer *buf_transfer;
+   struct vertex2f *rect;
+   unsigned i;
+
+   rect = pipe_buffer_map
+   (
+      r->pipe,
+      r->vertex_bufs.individual.rect.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   for ( i = 0; i < r->macroblocks_per_batch; ++i)
+     memcpy(rect + i * 24, &const_rectangle, sizeof(const_rectangle));
+
+   pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.rect.buffer, buf_transfer);
+   
+   return true;
+}
+
 static void
 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
 {
@@ -1044,49 +1067,12 @@ flush(struct vl_mpeg12_mc_renderer *r)
 {
    unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
    unsigned vb_start = 0;
-   struct vertex_shader_consts *vs_consts;
-   struct pipe_transfer *buf_transfer;
-   struct vertex2f *rect;
-   unsigned i;
 
    assert(r);
    assert(r->num_macroblocks == r->macroblocks_per_batch);
 
-   rect = pipe_buffer_map
-   (
-      r->pipe,
-      r->vertex_bufs.individual.rect.buffer,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buf_transfer
-   );
-
-   for ( i = 0; i < r->macroblocks_per_batch; ++i)
-     memcpy(rect + i * 24, &const_rectangle, sizeof(const_rectangle));
-   
-   pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.rect.buffer, buf_transfer);
-
    gen_macroblock_stream(r, num_macroblocks);
 
-   r->fb_state.cbufs[0] = r->surface;
-
-   r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
-   r->pipe->set_viewport_state(r->pipe, &r->viewport);
-
-   vs_consts = pipe_buffer_map
-   (
-      r->pipe, r->vs_const_buf,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buf_transfer
-   );
-
-   vs_consts->norm.x = 1.0f / r->surface->width;
-   vs_consts->norm.y = 1.0f / r->surface->height;
-
-   pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
-
-   r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
-                                r->vs_const_buf);
-
    if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
@@ -1199,6 +1185,33 @@ flush(struct vl_mpeg12_mc_renderer *r)
    r->num_macroblocks = 0;
 }
 
+static void
+update_render_target(struct vl_mpeg12_mc_renderer *r)
+{
+   struct pipe_transfer *buf_transfer;
+   struct vertex_shader_consts *vs_consts;
+
+   vs_consts = pipe_buffer_map
+   (
+      r->pipe, r->vs_const_buf,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   vs_consts->norm.x = 1.0f / r->surface->width;
+   vs_consts->norm.y = 1.0f / r->surface->height;
+
+   pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
+
+   r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
+                                r->vs_const_buf);
+
+   r->fb_state.cbufs[0] = r->surface;
+
+   r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
+   r->pipe->set_viewport_state(r->pipe, &r->viewport);
+}
+
 static void
 grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
 {
@@ -1367,6 +1380,14 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
       return false;
    }
 
+   if (!init_const_buffers(renderer)) {
+      util_delete_keymap(renderer->texview_map, renderer->pipe);
+      cleanup_pipe_state(renderer);
+      cleanup_shaders(renderer);
+      cleanup_buffers(renderer);
+      return false;
+   }
+
    renderer->surface = NULL;
    renderer->past = NULL;
    renderer->future = NULL;
@@ -1434,6 +1455,7 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
       pipe_surface_reference(&renderer->past, past);
       pipe_surface_reference(&renderer->future, future);
       renderer->fence = fence;
+      update_render_target(renderer);
    }
 
    while (num_macroblocks) {
-- 
cgit v1.2.3


From 97e92ab63bbf4282787499b4cf55ccb50675bdbc Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 11 Nov 2010 22:08:00 +0100
Subject: [g3dvl] move xfer_buffers_map/unmap into flush

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 0ac258c9667..fda54ac3632 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -1071,6 +1071,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    assert(r);
    assert(r->num_macroblocks == r->macroblocks_per_batch);
 
+   xfer_buffers_unmap(r);
    gen_macroblock_stream(r, num_macroblocks);
 
    if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
@@ -1183,6 +1184,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
 
    r->num_macroblocks = 0;
+   xfer_buffers_map(r);
 }
 
 static void
@@ -1436,7 +1438,6 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
    if (renderer->surface) {
       if (surface != renderer->surface) {
          if (renderer->num_macroblocks > 0) {
-            xfer_buffers_unmap(renderer);
             flush(renderer);
          }
 
@@ -1471,9 +1472,7 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
       num_macroblocks -= num_to_submit;
 
       if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
-         xfer_buffers_unmap(renderer);
          flush(renderer);
-         xfer_buffers_map(renderer);
          /* Next time we get this surface it may have new ref frames */
          pipe_surface_reference(&renderer->surface, NULL);
          pipe_surface_reference(&renderer->past, NULL);
-- 
cgit v1.2.3


From d8192f18218a364bc497d6348d8ad934b92a1696 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 12 Nov 2010 00:17:56 +0100
Subject: [g3dvl] use constants for vertex shader outputs and move field
 calculation into own function

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 93 +++++++++++++++---------
 1 file changed, 58 insertions(+), 35 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index fda54ac3632..51eacf85718 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -63,6 +63,19 @@ struct vert_stream_0
    float cr_eb;
 };
 
+enum VS_OUTPUT
+{
+   VS_O_VPOS,
+   VS_O_LINE,
+   VS_O_TEX0,
+   VS_O_TEX1,
+   VS_O_TEX2,
+   VS_O_MV0,
+   VS_O_MV1,
+   VS_O_MV2,
+   VS_O_MV3
+};
+
 enum MACROBLOCK_TYPE
 {
    MACROBLOCK_TYPE_INTRA,
@@ -102,7 +115,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    struct ureg_dst t_vpos, scale;
    struct ureg_dst o_vpos, o_vtex[3], o_vmv[4], o_line;
    unsigned i, j, count;
-   bool interlaced = mv_per_frame == 2;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
@@ -118,16 +130,17 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    vpos = ureg_DECL_vs_input(shader, 1);
    vtex = ureg_DECL_vs_input(shader, 2);
 
-   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, 0);
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
    for (i = 0; i < 3; ++i)
-      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 1 + i);
+      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0 + i);
    
    count=0;
    for (i = 0; i < ref_frames; ++i) {
       for (j = 0; j < 2; ++j) {        
         if(j < mv_per_frame) {
            vmv[count] = ureg_DECL_vs_input(shader, 3 + i * 2 + j);
-           o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
+           o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + count);
            count++;
         }
         /* workaround for r600g */
@@ -136,8 +149,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
       }
    }
 
-   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, 4 + count);
-
    /*
     * scale = norm * mbs;
     *
@@ -165,6 +176,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
+   ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f));
+   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
+
    for (i = 0; i < 3; ++i) {
       ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
       ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_X + i));
@@ -176,11 +190,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
          ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(scale), vmv[i], ureg_src(t_vpos));
    }
 
-   if (interlaced) {
-      ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f));
-      ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
-   }
-
    ureg_release_temporary(shader, t_vpos);
    ureg_release_temporary(shader, scale);
 
@@ -189,6 +198,27 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
+static struct ureg_dst
+calc_field(struct ureg_program *shader)
+{
+   struct ureg_dst tmp;
+   struct ureg_src line;
+
+   tmp = ureg_DECL_temporary(shader);
+   line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
+
+   /*
+    * line going from 0 to 8 in steps of 0.5
+    *
+    * tmp = fraction(line)
+    * tmp = tmp >= 0.5 ? 1 : 0
+    */
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
+
+   return tmp;
+}
+
 static struct ureg_dst
 fetch_ycbcr(struct ureg_program *shader)
 {
@@ -201,7 +231,7 @@ fetch_ycbcr(struct ureg_program *shader)
    tmp = ureg_DECL_temporary(shader);
 
    for (i = 0; i < 3; ++i)  {
-      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 1, TGSI_INTERPOLATE_LINEAR);
+      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0 + i, TGSI_INTERPOLATE_LINEAR);
       sampler[i] = ureg_DECL_sampler(shader, i);
    }
 
@@ -260,7 +290,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    if (!shader)
       return NULL;
 
-   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 4, TGSI_INTERPOLATE_LINEAR);
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0, TGSI_INTERPOLATE_LINEAR);
    sampler = ureg_DECL_sampler(shader, 3);
 
    ref = ureg_DECL_temporary(shader);
@@ -286,9 +316,9 @@ static void *
 create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src tc[2], line;
+   struct ureg_src tc[2];
    struct ureg_src sampler;
-   struct ureg_dst texel, ref, tmp;
+   struct ureg_dst texel, ref, field;
    struct ureg_dst fragment;
    unsigned i, label;
 
@@ -297,18 +327,16 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       return NULL;
 
    for (i = 0; i < 2; ++i)
-      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 4, TGSI_INTERPOLATE_LINEAR);
+      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
    sampler = ureg_DECL_sampler(shader, 3);
 
-   line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 6, TGSI_INTERPOLATE_LINEAR);
-
    ref = ureg_DECL_temporary(shader);
-   tmp = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
     * texel = fetch_ycbcr()
-    * if(line % 2)
+    * field = calc_field();
+    * if(field == 1)
     *    ref = tex(tc[1], sampler)
     * else
     *    ref = tex(tc[0], sampler)
@@ -316,9 +344,8 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     */
    texel = fetch_ycbcr(shader);
 
-   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
-   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
+   field = calc_field(shader);
+   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
       ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[1], sampler);
    ureg_ELSE(shader, &label);
       ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[0], sampler);
@@ -326,8 +353,8 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
 
-   ureg_release_temporary(shader, tmp);
    ureg_release_temporary(shader, texel);
+   ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, ref);
    ureg_END(shader);
 
@@ -349,7 +376,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       return NULL;
 
    for (i = 0; i < 2; ++i)  {
-      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 4, TGSI_INTERPOLATE_LINEAR);
+      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
       sampler[i] = ureg_DECL_sampler(shader, i + 3);
    }
 
@@ -382,9 +409,9 @@ static void *
 create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src tc[4], line;
+   struct ureg_src tc[4];
    struct ureg_src sampler[2];
-   struct ureg_dst texel, ref[2], tmp;
+   struct ureg_dst texel, ref[2], field;
    struct ureg_dst fragment;
    unsigned i, label;
 
@@ -393,16 +420,13 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       return NULL;
 
    for (i = 0; i < 4; ++i)
-      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, i + 4, TGSI_INTERPOLATE_LINEAR);
+      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
    for (i = 0; i < 2; ++i)
       sampler[i] = ureg_DECL_sampler(shader, i + 3);
 
-   line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 8, TGSI_INTERPOLATE_LINEAR);
-
    texel = ureg_DECL_temporary(shader);
    ref[0] = ureg_DECL_temporary(shader);
    ref[1] = ureg_DECL_temporary(shader);
-   tmp = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
@@ -416,9 +440,8 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     */
    texel = fetch_ycbcr(shader);
 
-   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
-   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
+   field = calc_field(shader);
+   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
       ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
       ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[3], sampler[1]);
    ureg_ELSE(shader, &label);
@@ -430,8 +453,8 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
 
-   ureg_release_temporary(shader, tmp);
    ureg_release_temporary(shader, texel);
+   ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, ref[0]);
    ureg_release_temporary(shader, ref[1]);
    ureg_END(shader);
-- 
cgit v1.2.3


From 7af05c4fe6283bb69d0b157975589f035516bb2d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 12 Nov 2010 00:29:37 +0100
Subject: [g3dvl] move applying z-coord to fragment shader

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 51eacf85718..ba8c5e8fd05 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -69,7 +69,7 @@ enum VS_OUTPUT
    VS_O_LINE,
    VS_O_TEX0,
    VS_O_TEX1,
-   VS_O_TEX2,
+   VS_O_INFO,
    VS_O_MV0,
    VS_O_MV1,
    VS_O_MV2,
@@ -113,7 +113,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    struct ureg_src norm, mbs;
    struct ureg_src vrect, vpos, vtex, vmv[4];
    struct ureg_dst t_vpos, scale;
-   struct ureg_dst o_vpos, o_vtex[3], o_vmv[4], o_line;
+   struct ureg_dst o_vpos, o_vtex[2], o_info, o_vmv[4], o_line;
    unsigned i, j, count;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -132,8 +132,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
    o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
-   for (i = 0; i < 3; ++i)
-      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0 + i);
+   o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
+   o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
+   o_info = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO);
    
    count=0;
    for (i = 0; i < ref_frames; ++i) {
@@ -179,10 +180,10 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f));
    ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
 
-   for (i = 0; i < 3; ++i) {
+   for (i = 0; i < 2; ++i) {
       ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_scalar(vtex, TGSI_SWIZZLE_X + i));
    }
+   ureg_MOV(shader, o_info, vtex);
 
    if(count > 0) {
       ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, ureg_imm1f(shader, 0.5f));
@@ -222,7 +223,7 @@ calc_field(struct ureg_program *shader)
 static struct ureg_dst
 fetch_ycbcr(struct ureg_program *shader)
 {
-   struct ureg_src tc[3];
+   struct ureg_src tc[2], info;
    struct ureg_src sampler[3];
    struct ureg_dst texel, tmp;
    unsigned i;
@@ -230,8 +231,10 @@ fetch_ycbcr(struct ureg_program *shader)
    texel = ureg_DECL_temporary(shader);
    tmp = ureg_DECL_temporary(shader);
 
+   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
+   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
+   info = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO, TGSI_INTERPOLATE_LINEAR);
    for (i = 0; i < 3; ++i)  {
-      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0 + i, TGSI_INTERPOLATE_LINEAR);
       sampler[i] = ureg_DECL_sampler(shader, i);
    }
 
@@ -241,8 +244,11 @@ fetch_ycbcr(struct ureg_program *shader)
     * texel.cr = tex(tc[2], sampler[2])
     */
    for (i = 0; i < 3; ++i) {
+      ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), tc[0]);
+      ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(info, TGSI_SWIZZLE_X + i));
+
       /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
-      ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, tc[i], sampler[i]);
+      ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(tmp), sampler[i]);
       ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
    }
 
-- 
cgit v1.2.3


From fa4d27451655cb008fe75bf00a3176c9b76245e4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 12 Nov 2010 01:04:22 +0100
Subject: [g3dvl] make room for second z-coord and interlaced flag

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 135 ++++++++++++++---------
 1 file changed, 85 insertions(+), 50 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index ba8c5e8fd05..4a6dda729d1 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -58,9 +58,29 @@ struct fragment_shader_consts
 struct vert_stream_0
 {
    struct vertex2f pos;
-   float luma_eb;
-   float cb_eb;
-   float cr_eb;
+
+   struct {
+      float luma_eb;
+      float cb_eb;
+      float cr_eb;
+   } field[2];
+
+   float interlaced;
+};
+
+enum VS_INPUT
+{
+   VS_I_RECT,
+   VS_I_VPOS,
+   VS_I_FIELD0,
+   VS_I_FIELD1,
+   VS_I_INTERLACED,
+   VS_I_MV0,
+   VS_I_MV1,
+   VS_I_MV2,
+   VS_I_MV3,
+
+   NUM_VS_INPUTS
 };
 
 enum VS_OUTPUT
@@ -111,7 +131,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 {
    struct ureg_program *shader;
    struct ureg_src norm, mbs;
-   struct ureg_src vrect, vpos, vtex, vmv[4];
+   struct ureg_src vrect, vpos, field[2], interlaced, vmv[4];
    struct ureg_dst t_vpos, scale;
    struct ureg_dst o_vpos, o_vtex[2], o_info, o_vmv[4], o_line;
    unsigned i, j, count;
@@ -126,9 +146,11 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    t_vpos = ureg_DECL_temporary(shader);
    scale = ureg_DECL_temporary(shader);
 
-   vrect = ureg_DECL_vs_input(shader, 0);
-   vpos = ureg_DECL_vs_input(shader, 1);
-   vtex = ureg_DECL_vs_input(shader, 2);
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+   field[0] = ureg_DECL_vs_input(shader, VS_I_FIELD0);
+   field[1] = ureg_DECL_vs_input(shader, VS_I_FIELD1);
+   interlaced = ureg_DECL_vs_input(shader, VS_I_INTERLACED);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
    o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
@@ -140,13 +162,13 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    for (i = 0; i < ref_frames; ++i) {
       for (j = 0; j < 2; ++j) {        
         if(j < mv_per_frame) {
-           vmv[count] = ureg_DECL_vs_input(shader, 3 + i * 2 + j);
+           vmv[count] = ureg_DECL_vs_input(shader, VS_I_MV0 + i * 2 + j);
            o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + count);
            count++;
         }
         /* workaround for r600g */
         else if(ref_frames == 2)
-           ureg_DECL_vs_input(shader, 3 + i * 2 + j);
+           ureg_DECL_vs_input(shader, VS_I_MV0 + i * 2 + j);
       }
    }
 
@@ -183,7 +205,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    for (i = 0; i < 2; ++i) {
       ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    }
-   ureg_MOV(shader, o_info, vtex);
+   ureg_MOV(shader, o_info, field[0]);
 
    if(count > 0) {
       ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, ureg_imm1f(shader, 0.5f));
@@ -630,7 +652,7 @@ static bool
 init_buffers(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_resource template;
-   struct pipe_vertex_element vertex_elems[7];
+   struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
    struct pipe_sampler_view sampler_view;
 
    const unsigned mbw =
@@ -724,51 +746,64 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
 
    memset(&vertex_elems, 0, sizeof(vertex_elems));
 
+
    /* Rectangle element */
-   vertex_elems[0].src_offset = 0;
-   vertex_elems[0].instance_divisor = 0;
-   vertex_elems[0].vertex_buffer_index = 0;
-   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[VS_I_RECT].src_offset = 0;
+   vertex_elems[VS_I_RECT].instance_divisor = 0;
+   vertex_elems[VS_I_RECT].vertex_buffer_index = 0;
+   vertex_elems[VS_I_RECT].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Position element */
-   vertex_elems[1].src_offset = 0;
-   vertex_elems[1].instance_divisor = 0;
-   vertex_elems[1].vertex_buffer_index = 1;
-   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* y, cr, cb z-coordinate element */
-   vertex_elems[2].src_offset = sizeof(struct vertex2f);
-   vertex_elems[2].instance_divisor = 0;
-   vertex_elems[2].vertex_buffer_index = 1;
-   vertex_elems[2].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+   vertex_elems[VS_I_VPOS].src_offset = 0;
+   vertex_elems[VS_I_VPOS].instance_divisor = 0;
+   vertex_elems[VS_I_VPOS].vertex_buffer_index = 1;
+   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* y, cr, cb z-coordinate element top field */
+   vertex_elems[VS_I_FIELD0].src_offset = sizeof(struct vertex2f);
+   vertex_elems[VS_I_FIELD0].instance_divisor = 0;
+   vertex_elems[VS_I_FIELD0].vertex_buffer_index = 1;
+   vertex_elems[VS_I_FIELD0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+
+   /* y, cr, cb z-coordinate element bottom field */
+   vertex_elems[VS_I_FIELD1].src_offset = sizeof(struct vertex2f) + sizeof(float) * 3;
+   vertex_elems[VS_I_FIELD1].instance_divisor = 0;
+   vertex_elems[VS_I_FIELD1].vertex_buffer_index = 1;
+   vertex_elems[VS_I_FIELD1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+
+   /* temporary workaound */
+   vertex_elems[VS_I_INTERLACED].src_offset = sizeof(struct vertex2f) + sizeof(float) * 6;
+   vertex_elems[VS_I_INTERLACED].instance_divisor = 0;
+   vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 1;
+   vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
 
    /* First ref surface top field texcoord element */
-   vertex_elems[3].src_offset = 0;
-   vertex_elems[3].instance_divisor = 0;
-   vertex_elems[3].vertex_buffer_index = 2;
-   vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[VS_I_MV0].src_offset = 0;
+   vertex_elems[VS_I_MV0].instance_divisor = 0;
+   vertex_elems[VS_I_MV0].vertex_buffer_index = 2;
+   vertex_elems[VS_I_MV0].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* First ref surface bottom field texcoord element */
-   vertex_elems[4].src_offset = sizeof(struct vertex2f);
-   vertex_elems[4].instance_divisor = 0;
-   vertex_elems[4].vertex_buffer_index = 2;
-   vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[VS_I_MV1].src_offset = sizeof(struct vertex2f);
+   vertex_elems[VS_I_MV1].instance_divisor = 0;
+   vertex_elems[VS_I_MV1].vertex_buffer_index = 2;
+   vertex_elems[VS_I_MV1].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Second ref surface top field texcoord element */
-   vertex_elems[5].src_offset = 0;
-   vertex_elems[5].instance_divisor = 0;
-   vertex_elems[5].vertex_buffer_index = 3;
-   vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[VS_I_MV2].src_offset = 0;
+   vertex_elems[VS_I_MV2].instance_divisor = 0;
+   vertex_elems[VS_I_MV2].vertex_buffer_index = 3;
+   vertex_elems[VS_I_MV2].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Second ref surface bottom field texcoord element */
-   vertex_elems[6].src_offset = sizeof(struct vertex2f);
-   vertex_elems[6].instance_divisor = 0;
-   vertex_elems[6].vertex_buffer_index = 3;
-   vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[VS_I_MV3].src_offset = sizeof(struct vertex2f);
+   vertex_elems[VS_I_MV3].instance_divisor = 0;
+   vertex_elems[VS_I_MV3].vertex_buffer_index = 3;
+   vertex_elems[VS_I_MV3].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 3, vertex_elems);
-   r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 5, vertex_elems);
-   r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 7, vertex_elems);
+   r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 5, vertex_elems);
+   r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 7, vertex_elems);
+   r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 9, vertex_elems);
 
    r->vs_const_buf = pipe_buffer_create
    (
@@ -861,24 +896,24 @@ gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
    v.pos.y = mb->mby;
 
    if (cbp & luma_mask || mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
-      v.luma_eb = 0.0f;
+      v.field[0].luma_eb = 0.0f;
    }
    else {
-      v.luma_eb = -1.0f;
+      v.field[0].luma_eb = -1.0f;
    }
 
    if (cbp & cb_mask) {
-      v.cb_eb = 0.0f;
+      v.field[0].cb_eb = 0.0f;
    }
    else {
-      v.cb_eb = -1.0f;
+      v.field[0].cb_eb = -1.0f;
    }
 
    if (cbp & cr_mask) {
-      v.cr_eb = 0.0f;
+      v.field[0].cr_eb = 0.0f;
    }
    else {
-      v.cr_eb = -1.0f;
+      v.field[0].cr_eb = -1.0f;
    }
 
    for ( i = 0; i < 6; ++i )
-- 
cgit v1.2.3


From 1eaf4806cd6a3c2cbfdf5b0f8377424dfe191902 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 12 Nov 2010 12:00:33 +0100
Subject: [g3dvl] and finally move field codec dct handling into shaders

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 176 +++++++++++++----------
 1 file changed, 98 insertions(+), 78 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 4a6dda729d1..f2706bcb66c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -89,7 +89,9 @@ enum VS_OUTPUT
    VS_O_LINE,
    VS_O_TEX0,
    VS_O_TEX1,
-   VS_O_INFO,
+   VS_O_TEX2,
+   VS_O_INFO0,
+   VS_O_INFO1,
    VS_O_MV0,
    VS_O_MV1,
    VS_O_MV2,
@@ -132,9 +134,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    struct ureg_program *shader;
    struct ureg_src norm, mbs;
    struct ureg_src vrect, vpos, field[2], interlaced, vmv[4];
-   struct ureg_dst t_vpos, scale;
-   struct ureg_dst o_vpos, o_vtex[2], o_info, o_vmv[4], o_line;
-   unsigned i, j, count;
+   struct ureg_dst t_vpos, scale, tmp;
+   struct ureg_dst o_vpos, o_vtex[3], o_info[2], o_vmv[4], o_line;
+   unsigned i, j, count, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
@@ -145,6 +147,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 
    t_vpos = ureg_DECL_temporary(shader);
    scale = ureg_DECL_temporary(shader);
+   tmp = ureg_DECL_temporary(shader);
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
@@ -156,7 +159,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
    o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
    o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
-   o_info = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO);
+   o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);
+   o_info[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO0);
+   o_info[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO1);
    
    count=0;
    for (i = 0; i < ref_frames; ++i) {
@@ -202,10 +207,32 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f));
    ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
 
-   for (i = 0; i < 2; ++i) {
-      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-   }
-   ureg_MOV(shader, o_info, field[0]);
+   ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+
+   ureg_IF(shader, interlaced, &label);
+
+      ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), vrect);
+      ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
+      ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, ureg_src(tmp));
+      ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_src(scale));
+      ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(tmp));
+
+      ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), vrect);
+      ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
+      ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
+      ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, ureg_src(tmp));
+      ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_src(scale));
+      ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(tmp));
+
+   ureg_ELSE(shader, &label);
+
+      ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+      ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+
+   ureg_ENDIF(shader);
+
+   ureg_MOV(shader, o_info[1], field[1]);
+   ureg_MOV(shader, o_info[0], field[0]);
 
    if(count > 0) {
       ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, ureg_imm1f(shader, 0.5f));
@@ -213,6 +240,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
          ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(scale), vmv[i], ureg_src(t_vpos));
    }
 
+   ureg_release_temporary(shader, tmp);
    ureg_release_temporary(shader, t_vpos);
    ureg_release_temporary(shader, scale);
 
@@ -243,19 +271,21 @@ calc_field(struct ureg_program *shader)
 }
 
 static struct ureg_dst
-fetch_ycbcr(struct ureg_program *shader)
+fetch_ycbcr(struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src tc[2], info;
+   struct ureg_src tc[2], info[2];
    struct ureg_src sampler[3];
    struct ureg_dst texel, tmp;
-   unsigned i;
+   unsigned i, label;
 
    texel = ureg_DECL_temporary(shader);
    tmp = ureg_DECL_temporary(shader);
 
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
-   info = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO, TGSI_INTERPOLATE_LINEAR);
+   tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR);
+   info[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO0, TGSI_INTERPOLATE_LINEAR);
+   info[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO1, TGSI_INTERPOLATE_LINEAR);
    for (i = 0; i < 3; ++i)  {
       sampler[i] = ureg_DECL_sampler(shader, i);
    }
@@ -266,8 +296,18 @@ fetch_ycbcr(struct ureg_program *shader)
     * texel.cr = tex(tc[2], sampler[2])
     */
    for (i = 0; i < 3; ++i) {
-      ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), tc[0]);
-      ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(info, TGSI_SWIZZLE_X + i));
+      if(i==0) {
+         ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
+            ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), tc[1]);
+            ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(info[1], TGSI_SWIZZLE_X + i));
+         ureg_ELSE(shader, &label);
+            ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), tc[0]);
+            ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(info[0], TGSI_SWIZZLE_X + i));
+         ureg_ENDIF(shader);
+      } else {
+         ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), tc[2]);
+         ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(info[0], TGSI_SWIZZLE_X + i));
+      }
 
       /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
       ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(tmp), sampler[i]);
@@ -283,7 +323,7 @@ static void *
 create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_dst texel;
+   struct ureg_dst field, texel;
    struct ureg_dst fragment;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
@@ -296,9 +336,11 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
     * texel = fetch_ycbcr()
     * fragment = texel * scale
     */
-   texel = fetch_ycbcr(shader);
+   field = calc_field(shader);
+   texel = fetch_ycbcr(shader, field);
    ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
 
+   ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
    ureg_END(shader);
 
@@ -311,7 +353,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_program *shader;
    struct ureg_src tc;
    struct ureg_src sampler;
-   struct ureg_dst texel, ref;
+   struct ureg_dst field, texel, ref;
    struct ureg_dst fragment;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
@@ -329,10 +371,12 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     * ref = tex(tc, sampler)
     * fragment = texel * scale + ref
     */
-   texel = fetch_ycbcr(shader);
+   field = calc_field(shader);
+   texel = fetch_ycbcr(shader, field);
    ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc, sampler);
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
 
+   ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
    ureg_release_temporary(shader, ref);
    ureg_END(shader);
@@ -370,9 +414,9 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     *    ref = tex(tc[0], sampler)
     * fragment = texel * scale + ref
     */
-   texel = fetch_ycbcr(shader);
-
    field = calc_field(shader);
+   texel = fetch_ycbcr(shader, field);
+
    ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
       ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[1], sampler);
    ureg_ELSE(shader, &label);
@@ -381,8 +425,8 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
 
-   ureg_release_temporary(shader, texel);
    ureg_release_temporary(shader, field);
+   ureg_release_temporary(shader, texel);
    ureg_release_temporary(shader, ref);
    ureg_END(shader);
 
@@ -395,7 +439,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_program *shader;
    struct ureg_src tc[2];
    struct ureg_src sampler[2];
-   struct ureg_dst texel, ref[2];
+   struct ureg_dst field, texel, ref[2];
    struct ureg_dst fragment;
    unsigned i;
 
@@ -418,13 +462,15 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     * ref[0] = lerp(ref[0], ref[1], 0.5)
     * fragment = texel * scale + ref[0]
     */
-   texel = fetch_ycbcr(shader);
+   field = calc_field(shader);
+   texel = fetch_ycbcr(shader, field);
    ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
    ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]);
    ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
 
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
 
+   ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
    ureg_release_temporary(shader, ref[0]);
    ureg_release_temporary(shader, ref[1]);
@@ -466,9 +512,9 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     * ref[0] = lerp(ref[0], ref[1], 0.5)
     * fragment = texel * scale + ref[0]
     */
-   texel = fetch_ycbcr(shader);
-
    field = calc_field(shader);
+   texel = fetch_ycbcr(shader, field);
+
    ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
       ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
       ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[3], sampler[1]);
@@ -481,8 +527,8 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
 
-   ureg_release_temporary(shader, texel);
    ureg_release_temporary(shader, field);
+   ureg_release_temporary(shader, texel);
    ureg_release_temporary(shader, ref[0]);
    ureg_release_temporary(shader, ref[1]);
    ureg_END(shader);
@@ -746,7 +792,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
 
    memset(&vertex_elems, 0, sizeof(vertex_elems));
 
-
    /* Rectangle element */
    vertex_elems[VS_I_RECT].src_offset = 0;
    vertex_elems[VS_I_RECT].instance_divisor = 0;
@@ -884,7 +929,7 @@ get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
 
 void
 gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
-                unsigned luma_mask, unsigned cb_mask, unsigned cr_mask)
+                unsigned luma_mask_0, unsigned luma_mask_1, unsigned cb_mask, unsigned cr_mask)
 {
    unsigned cbp = mb->cbp;
    unsigned i;
@@ -895,26 +940,28 @@ gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
    v.pos.x = mb->mbx;
    v.pos.y = mb->mby;
 
-   if (cbp & luma_mask || mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
-      v.field[0].luma_eb = 0.0f;
-   }
-   else {
-      v.field[0].luma_eb = -1.0f;
-   }
+   v.field[0].luma_eb = (cbp & luma_mask_0) ? 0.0f : -1.0f;
+   v.field[1].luma_eb = (cbp & luma_mask_1) ? 0.0f : -1.0f;
 
    if (cbp & cb_mask) {
       v.field[0].cb_eb = 0.0f;
+      v.field[1].cb_eb = 0.0f;
    }
    else {
       v.field[0].cb_eb = -1.0f;
+      v.field[1].cb_eb = -1.0f;
    }
 
    if (cbp & cr_mask) {
       v.field[0].cr_eb = 0.0f;
+      v.field[1].cr_eb = 0.0f;
    }
    else {
       v.field[0].cr_eb = -1.0f;
+      v.field[1].cr_eb = -1.0f;
    }
+   
+   v.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
 
    for ( i = 0; i < 6; ++i )
      memcpy(vb + i, &v, sizeof(v));
@@ -1033,10 +1080,17 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
       {
          struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
 
-         gen_block_verts(vb, mb, 32, 2, 1);
-         gen_block_verts(vb + 6, mb, 16, 2, 1);
-         gen_block_verts(vb + 12, mb, 8, 2, 1);
-         gen_block_verts(vb + 18, mb, 4, 2, 1);
+         if(mb->dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
+            gen_block_verts(vb     , mb, 32, 32, 2, 1);
+            gen_block_verts(vb + 6 , mb, 16, 16, 2, 1);
+            gen_block_verts(vb + 12, mb, 8,   8, 2, 1);
+            gen_block_verts(vb + 18, mb, 4,   4, 2, 1);
+         } else {
+            gen_block_verts(vb     , mb, 32, 8, 2, 1);
+            gen_block_verts(vb + 6 , mb, 16, 4, 2, 1);
+            gen_block_verts(vb + 12, mb, 32, 8, 2, 1);
+            gen_block_verts(vb + 18, mb, 16, 4, 2, 1);
+         }
 
          break;
       }
@@ -1279,7 +1333,7 @@ update_render_target(struct vl_mpeg12_mc_renderer *r)
 }
 
 static void
-grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
+grab_coded_block(short *src, short *dst, unsigned dst_pitch)
 {
    unsigned y;
 
@@ -1290,29 +1344,6 @@ grab_frame_coded_block(short *src, short *dst, unsigned dst_pitch)
       memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
 }
 
-static void
-grab_field_coded_block(short *src, short *dst, unsigned dst_pitch)
-{
-   unsigned y;
-
-   assert(src);
-   assert(dst);
-
-   for (y = 0; y < BLOCK_HEIGHT; ++y)
-      memcpy(dst + y * dst_pitch * 2, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
-}
-
-static void
-fill_field_zero_block(short *dst, unsigned dst_pitch)
-{
-   unsigned y;
-
-   assert(dst);
-
-   for (y = 0; y < BLOCK_HEIGHT; ++y)
-      memset(dst + y * dst_pitch * 2, 0, BLOCK_WIDTH * 2);
-}
-
 static void
 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
             enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
@@ -1332,22 +1363,11 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
    for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
          if ((cbp >> (5 - tb)) & 1) {
-            if (dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
-               grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
-                                      texels + y * tex_pitch * BLOCK_HEIGHT +
-                                      x * BLOCK_WIDTH, tex_pitch);
-            }
-            else {
-               grab_field_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
-                                      texels + y * tex_pitch + x * BLOCK_WIDTH,
-                                      tex_pitch);
-            }
-
+            grab_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
+                             texels + y * tex_pitch * BLOCK_HEIGHT +
+                             x * BLOCK_WIDTH, tex_pitch);
             ++sb;
          }
-         else if(dct_type == PIPE_MPEG12_DCT_TYPE_FIELD) {
-            fill_field_zero_block(texels + y * tex_pitch + x * BLOCK_WIDTH, tex_pitch);
-         }
       }
    }
 
@@ -1362,7 +1382,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
       texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
 
       if ((cbp >> (1 - tb)) & 1) {
-         grab_frame_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
+         grab_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
          ++sb;
       }
    }
-- 
cgit v1.2.3


From 33311ffed5f54fbc014fc35a204ec9ef667fcc47 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 12 Nov 2010 23:49:33 +0100
Subject: [g3dvl] cleanup

Add missing comments simplyfy shaders etc...
---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 131 ++++++++++++-----------
 1 file changed, 68 insertions(+), 63 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index f2706bcb66c..9cbc015c48c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -72,8 +72,8 @@ enum VS_INPUT
 {
    VS_I_RECT,
    VS_I_VPOS,
-   VS_I_FIELD0,
-   VS_I_FIELD1,
+   VS_I_EB0,
+   VS_I_EB1,
    VS_I_INTERLACED,
    VS_I_MV0,
    VS_I_MV1,
@@ -90,8 +90,8 @@ enum VS_OUTPUT
    VS_O_TEX0,
    VS_O_TEX1,
    VS_O_TEX2,
-   VS_O_INFO0,
-   VS_O_INFO1,
+   VS_O_EB0,
+   VS_O_EB1,
    VS_O_MV0,
    VS_O_MV1,
    VS_O_MV2,
@@ -133,9 +133,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 {
    struct ureg_program *shader;
    struct ureg_src norm, mbs;
-   struct ureg_src vrect, vpos, field[2], interlaced, vmv[4];
-   struct ureg_dst t_vpos, scale, tmp;
-   struct ureg_dst o_vpos, o_vtex[3], o_info[2], o_vmv[4], o_line;
+   struct ureg_src vrect, vpos, eb[2], interlaced, vmv[4];
+   struct ureg_dst scale, t_vpos, t_vtex;
+   struct ureg_dst o_vpos, o_vtex[3], o_eb[2], o_vmv[4], o_line;
    unsigned i, j, count, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -145,14 +145,14 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    norm = ureg_DECL_constant(shader, 0);
    mbs = ureg_imm2f(shader, MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT);
 
-   t_vpos = ureg_DECL_temporary(shader);
    scale = ureg_DECL_temporary(shader);
-   tmp = ureg_DECL_temporary(shader);
+   t_vpos = ureg_DECL_temporary(shader);
+   t_vtex = ureg_DECL_temporary(shader);
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
-   field[0] = ureg_DECL_vs_input(shader, VS_I_FIELD0);
-   field[1] = ureg_DECL_vs_input(shader, VS_I_FIELD1);
+   eb[0] = ureg_DECL_vs_input(shader, VS_I_EB0);
+   eb[1] = ureg_DECL_vs_input(shader, VS_I_EB1);
    interlaced = ureg_DECL_vs_input(shader, VS_I_INTERLACED);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
@@ -160,8 +160,8 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
    o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
    o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);
-   o_info[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO0);
-   o_info[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO1);
+   o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB0);
+   o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB1);
    
    count=0;
    for (i = 0; i < ref_frames; ++i) {
@@ -184,18 +184,28 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
     * o_vpos.xy = t_vpos
     * o_vpos.zw = vpos
     *
-    * o_vtex[0..2].xy = t_vpos
-    * o_vtex[0..2].z = vtex[0..2].[xyz]
+    * o_line.x = 1
+    * o_line.y = vpos.y * 8
+    *
+    * if(interlaced) {
+    *    t_vtex.x = vrect.x
+    *    t_vtex.y = vrect.y * 0.5
+    *    t_vtex += vpos
+    *
+    *    o_vtex[0].xy = t_vtex * scale
+    *
+    *    t_vtex.y += 0.5
+    *    o_vtex[1].xy = t_vtex * scale
+    * } else {
+    *    o_vtex[0..1].xy = t_vpos
+    * }
+    * o_vtex[2].xy = t_vpos
     *
     * if(count > 0) { // Apply motion vectors
     *    scale = norm * 0.5;
     *    o_vmv[0..count] = t_vpos + vmv[0..4] * scale
     * }
     *
-    * if(interlaced) {
-    *    o_line.x = 1
-    *    o_line.y = vpos.y * 8
-    * }
     */
    ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, mbs);
 
@@ -207,22 +217,14 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f));
    ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
 
-   ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-
    ureg_IF(shader, interlaced, &label);
 
-      ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), vrect);
-      ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
-      ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, ureg_src(tmp));
-      ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_src(scale));
-      ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(tmp));
-
-      ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), vrect);
-      ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
-      ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
-      ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, ureg_src(tmp));
-      ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_src(scale));
-      ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(tmp));
+      ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
+      ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
+      ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex));
+      ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), ureg_src(scale));
+      ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
+      ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), ureg_src(scale));
 
    ureg_ELSE(shader, &label);
 
@@ -230,9 +232,10 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
       ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
 
    ureg_ENDIF(shader);
+   ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
 
-   ureg_MOV(shader, o_info[1], field[1]);
-   ureg_MOV(shader, o_info[0], field[0]);
+   ureg_MOV(shader, o_eb[0], eb[0]);
+   ureg_MOV(shader, o_eb[1], eb[1]);
 
    if(count > 0) {
       ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, ureg_imm1f(shader, 0.5f));
@@ -240,7 +243,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
          ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(scale), vmv[i], ureg_src(t_vpos));
    }
 
-   ureg_release_temporary(shader, tmp);
+   ureg_release_temporary(shader, t_vtex);
    ureg_release_temporary(shader, t_vpos);
    ureg_release_temporary(shader, scale);
 
@@ -271,9 +274,9 @@ calc_field(struct ureg_program *shader)
 }
 
 static struct ureg_dst
-fetch_ycbcr(struct ureg_program *shader, struct ureg_dst field)
+fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src tc[2], info[2];
+   struct ureg_src tc[3], eb[2];
    struct ureg_src sampler[3];
    struct ureg_dst texel, tmp;
    unsigned i, label;
@@ -284,29 +287,31 @@ fetch_ycbcr(struct ureg_program *shader, struct ureg_dst field)
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
    tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR);
-   info[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO0, TGSI_INTERPOLATE_LINEAR);
-   info[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO1, TGSI_INTERPOLATE_LINEAR);
+
+   eb[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB0, TGSI_INTERPOLATE_LINEAR);
+   eb[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB1, TGSI_INTERPOLATE_LINEAR);
+
    for (i = 0; i < 3; ++i)  {
       sampler[i] = ureg_DECL_sampler(shader, i);
    }
 
    /*
-    * texel.y  = tex(tc[0], sampler[0])
-    * texel.cb = tex(tc[1], sampler[1])
+    * texel.y  = tex(field ? tc[1] : tc[0], sampler[0])
+    * texel.cb = tex(tc[2], sampler[1])
     * texel.cr = tex(tc[2], sampler[2])
     */
    for (i = 0; i < 3; ++i) {
-      if(i==0) {
+      if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
          ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
             ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), tc[1]);
-            ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(info[1], TGSI_SWIZZLE_X + i));
+            ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(eb[1], TGSI_SWIZZLE_X + i));
          ureg_ELSE(shader, &label);
             ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), tc[0]);
-            ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(info[0], TGSI_SWIZZLE_X + i));
+            ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(eb[0], TGSI_SWIZZLE_X + i));
          ureg_ENDIF(shader);
       } else {
          ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), tc[2]);
-         ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(info[0], TGSI_SWIZZLE_X + i));
+         ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(eb[0], TGSI_SWIZZLE_X + i));
       }
 
       /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
@@ -337,7 +342,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
     * fragment = texel * scale
     */
    field = calc_field(shader);
-   texel = fetch_ycbcr(shader, field);
+   texel = fetch_ycbcr(r, shader, field);
    ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
 
    ureg_release_temporary(shader, field);
@@ -372,7 +377,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     * fragment = texel * scale + ref
     */
    field = calc_field(shader);
-   texel = fetch_ycbcr(shader, field);
+   texel = fetch_ycbcr(r, shader, field);
    ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc, sampler);
    ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
 
@@ -415,7 +420,7 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     * fragment = texel * scale + ref
     */
    field = calc_field(shader);
-   texel = fetch_ycbcr(shader, field);
+   texel = fetch_ycbcr(r, shader, field);
 
    ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
       ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[1], sampler);
@@ -463,7 +468,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
     * fragment = texel * scale + ref[0]
     */
    field = calc_field(shader);
-   texel = fetch_ycbcr(shader, field);
+   texel = fetch_ycbcr(r, shader, field);
    ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
    ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]);
    ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
@@ -505,15 +510,15 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    /*
     * texel = fetch_ycbcr()
-    * if(line % 2)
-    *    ref[0..1] = tex(tc[4|6], sampler[3..4])
+    * if(field == 1)
+    *    ref[0..1] = tex(tc[1|3], sampler[0..1])
     * else
-    *    ref[0..1] = tex(tc[3|5], sampler[3..4])
+    *    ref[0..1] = tex(tc[0|2], sampler[0..1])
     * ref[0] = lerp(ref[0], ref[1], 0.5)
     * fragment = texel * scale + ref[0]
     */
    field = calc_field(shader);
-   texel = fetch_ycbcr(shader, field);
+   texel = fetch_ycbcr(r, shader, field);
 
    ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
       ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
@@ -805,18 +810,18 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* y, cr, cb z-coordinate element top field */
-   vertex_elems[VS_I_FIELD0].src_offset = sizeof(struct vertex2f);
-   vertex_elems[VS_I_FIELD0].instance_divisor = 0;
-   vertex_elems[VS_I_FIELD0].vertex_buffer_index = 1;
-   vertex_elems[VS_I_FIELD0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+   vertex_elems[VS_I_EB0].src_offset = sizeof(struct vertex2f);
+   vertex_elems[VS_I_EB0].instance_divisor = 0;
+   vertex_elems[VS_I_EB0].vertex_buffer_index = 1;
+   vertex_elems[VS_I_EB0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
 
    /* y, cr, cb z-coordinate element bottom field */
-   vertex_elems[VS_I_FIELD1].src_offset = sizeof(struct vertex2f) + sizeof(float) * 3;
-   vertex_elems[VS_I_FIELD1].instance_divisor = 0;
-   vertex_elems[VS_I_FIELD1].vertex_buffer_index = 1;
-   vertex_elems[VS_I_FIELD1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+   vertex_elems[VS_I_EB1].src_offset = sizeof(struct vertex2f) + sizeof(float) * 3;
+   vertex_elems[VS_I_EB1].instance_divisor = 0;
+   vertex_elems[VS_I_EB1].vertex_buffer_index = 1;
+   vertex_elems[VS_I_EB1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
 
-   /* temporary workaound */
+   /* progressive=1.0f interlaced=0.0f */
    vertex_elems[VS_I_INTERLACED].src_offset = sizeof(struct vertex2f) + sizeof(float) * 6;
    vertex_elems[VS_I_INTERLACED].instance_divisor = 0;
    vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 1;
-- 
cgit v1.2.3


From d128c091c74f3cd862bd12a381bc521cae42264c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 13 Nov 2010 14:06:47 +0100
Subject: [g3dvl] use quads instead of triangles

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 105 +++++++++++------------
 1 file changed, 48 insertions(+), 57 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 9cbc015c48c..ddfd0b751f9 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -111,21 +111,12 @@ enum MACROBLOCK_TYPE
    NUM_MACROBLOCK_TYPES
 };
 
-/* vertices for two triangles covering four block */
-static const struct vertex2f const_rectangle[4][2][3] = {
-   {
-      { {0.0f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.0f} },
-      { {0.5f, 0.0f}, {0.0f, 0.5f}, {0.5f, 0.5f} }
-   }, {
-      { {0.5f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.0f} },
-      { {1.0f, 0.0f}, {0.5f, 0.5f}, {1.0f, 0.5f} }
-   }, {
-      { {0.0f, 0.5f}, {0.0f, 1.0f}, {0.5f, 0.5f} },
-      { {0.5f, 0.5f}, {0.0f, 1.0f}, {0.5f, 1.0f} }
-   }, {
-      { {0.5f, 0.5f}, {0.5f, 1.0f}, {1.0f, 0.5f} },
-      { {1.0f, 0.5f}, {0.5f, 1.0f}, {1.0f, 1.0f} }
-   }
+/* vertices for four quads covering the blocks */
+static const struct vertex2f const_quads[4][4] = {
+   { {0.0f, 0.0f}, {0.5f, 0.0f}, {0.5f, 0.5f}, {0.0f, 0.5f} },
+   { {0.5f, 0.0f}, {1.0f, 0.0f}, {1.0f, 0.5f}, {0.5f, 0.5f} },
+   { {0.0f, 0.5f}, {0.5f, 0.5f}, {0.5f, 1.0f}, {0.0f, 1.0f} },
+   { {0.5f, 0.5f}, {1.0f, 0.5f}, {1.0f, 1.0f}, {0.5f, 1.0f} },
 };
 
 static void *
@@ -762,36 +753,36 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    }
 
    r->vertex_bufs.individual.rect.stride = sizeof(struct vertex2f);
-   r->vertex_bufs.individual.rect.max_index = 24 * r->macroblocks_per_batch - 1;
+   r->vertex_bufs.individual.rect.max_index = 16 * r->macroblocks_per_batch - 1;
    r->vertex_bufs.individual.rect.buffer_offset = 0;
    r->vertex_bufs.individual.rect.buffer = pipe_buffer_create
    (
       r->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vertex2f) * 24 * r->macroblocks_per_batch
+      sizeof(struct vertex2f) * 16 * r->macroblocks_per_batch
    );
 
    r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vert_stream_0);
-   r->vertex_bufs.individual.ycbcr.max_index = 24 * r->macroblocks_per_batch - 1;
+   r->vertex_bufs.individual.ycbcr.max_index = 16 * r->macroblocks_per_batch - 1;
    r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
    /* XXX: Create with usage DYNAMIC or STREAM */
    r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
    (
       r->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vert_stream_0) * 24 * r->macroblocks_per_batch
+      sizeof(struct vert_stream_0) * 16 * r->macroblocks_per_batch
    );
 
    for (i = 0; i < 2; ++i) {
       r->vertex_bufs.individual.ref[i].stride = sizeof(struct vertex2f) * 2;
-      r->vertex_bufs.individual.ref[i].max_index = 24 * r->macroblocks_per_batch - 1;
+      r->vertex_bufs.individual.ref[i].max_index = 16 * r->macroblocks_per_batch - 1;
       r->vertex_bufs.individual.ref[i].buffer_offset = 0;
       /* XXX: Create with usage DYNAMIC or STREAM */
       r->vertex_bufs.individual.ref[i].buffer = pipe_buffer_create
       (
          r->pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
-         sizeof(struct vertex2f) * 2 * 24 * r->macroblocks_per_batch
+         sizeof(struct vertex2f) * 2 * 16 * r->macroblocks_per_batch
       );
    }
 
@@ -881,7 +872,7 @@ init_const_buffers(struct vl_mpeg12_mc_renderer *r)
    );
 
    for ( i = 0; i < r->macroblocks_per_batch; ++i)
-     memcpy(rect + i * 24, &const_rectangle, sizeof(const_rectangle));
+     memcpy(rect + i * 16, &const_quads, sizeof(const_quads));
 
    pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.rect.buffer, buf_transfer);
    
@@ -968,7 +959,7 @@ gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
    
    v.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
 
-   for ( i = 0; i < 6; ++i )
+   for ( i = 0; i < 4; ++i )
      memcpy(vb + i, &v, sizeof(v));
 }
 
@@ -996,13 +987,13 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
 
          assert(ref_vb && ref_vb[1]);
 
-         vb = ref_vb[1] + pos * 2 * 24;
+         vb = ref_vb[1] + pos * 2 * 16;
 
          mo_vec[0].x = mb->pmv[0][1][0];
          mo_vec[0].y = mb->pmv[0][1][1];
 
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            for (i = 0; i < 24 * 2; i += 2) {
+            for (i = 0; i < 16 * 2; i += 2) {
                vb[i].x = mo_vec[0].x;
                vb[i].y = mo_vec[0].y;
             }
@@ -1016,7 +1007,7 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
             if(mb->mvfs[0][1]) mo_vec[0].y += 2;
             if(!mb->mvfs[1][1]) mo_vec[1].y -= 2;
 
-            for (i = 0; i < 24 * 2; i += 2) {
+            for (i = 0; i < 16 * 2; i += 2) {
                vb[i].x = mo_vec[0].x;
                vb[i].y = mo_vec[0].y;
                vb[i + 1].x = mo_vec[1].x;
@@ -1033,7 +1024,7 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
 
          assert(ref_vb && ref_vb[0]);
 
-         vb = ref_vb[0] + pos * 2 * 24;
+         vb = ref_vb[0] + pos * 2 * 16;
 
          if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
             mo_vec[0].x = mb->pmv[0][1][0];
@@ -1065,13 +1056,13 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
          }
 
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            for (i = 0; i < 24 * 2; i += 2) {
+            for (i = 0; i < 16 * 2; i += 2) {
                vb[i].x = mo_vec[0].x;
                vb[i].y = mo_vec[0].y;
             }
          }
          else {
-            for (i = 0; i < 24 * 2; i += 2) {
+            for (i = 0; i < 16 * 2; i += 2) {
                vb[i].x = mo_vec[0].x;
                vb[i].y = mo_vec[0].y;
                vb[i + 1].x = mo_vec[1].x;
@@ -1083,18 +1074,18 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
       }
       case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
       {
-         struct vert_stream_0 *vb = ycbcr_vb + pos * 24;
+         struct vert_stream_0 *vb = ycbcr_vb + pos * 16;
 
          if(mb->dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
             gen_block_verts(vb     , mb, 32, 32, 2, 1);
-            gen_block_verts(vb + 6 , mb, 16, 16, 2, 1);
-            gen_block_verts(vb + 12, mb, 8,   8, 2, 1);
-            gen_block_verts(vb + 18, mb, 4,   4, 2, 1);
+            gen_block_verts(vb + 4 , mb, 16, 16, 2, 1);
+            gen_block_verts(vb + 8 , mb, 8,   8, 2, 1);
+            gen_block_verts(vb + 12, mb, 4,   4, 2, 1);
          } else {
             gen_block_verts(vb     , mb, 32, 8, 2, 1);
-            gen_block_verts(vb + 6 , mb, 16, 4, 2, 1);
-            gen_block_verts(vb + 12, mb, 32, 8, 2, 1);
-            gen_block_verts(vb + 18, mb, 16, 4, 2, 1);
+            gen_block_verts(vb + 4 , mb, 16, 4, 2, 1);
+            gen_block_verts(vb + 8 , mb, 32, 8, 2, 1);
+            gen_block_verts(vb + 12, mb, 16, 4, 2, 1);
          }
 
          break;
@@ -1205,9 +1196,9 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->i_vs);
       r->pipe->bind_fs_state(r->pipe, r->i_fs);
 
-      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 24;
+      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_INTRA] * 16);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 16;
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
@@ -1220,9 +1211,9 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
       r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
 
-      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 24;
+      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 16);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 16;
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0) {
@@ -1235,9 +1226,9 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
       r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
 
-      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 24;
+      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 16);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 16;
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
@@ -1250,9 +1241,9 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
       r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
 
-      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 24;
+      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 16);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 16;
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0) {
@@ -1265,9 +1256,9 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
       r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
 
-      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 24;
+      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 16);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 16;
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
@@ -1282,9 +1273,9 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
       r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
 
-      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 24;
+      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 16);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 16;
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0) {
@@ -1299,9 +1290,9 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
       r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
 
-      util_draw_arrays(r->pipe, PIPE_PRIM_TRIANGLES, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 24;
+      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
+                       num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 16);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 16;
    }
 
    r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
-- 
cgit v1.2.3


From 35a8efe5773909e31e36e5e0e379ae7a30913b7b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 13 Nov 2010 16:56:59 +0100
Subject: [g3dvl] move empty block handling completely into shaders

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 206 ++++++++++++++---------
 1 file changed, 126 insertions(+), 80 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index ddfd0b751f9..aae549cac49 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -63,7 +63,7 @@ struct vert_stream_0
       float luma_eb;
       float cb_eb;
       float cr_eb;
-   } field[2];
+   } field[2][2];
 
    float interlaced;
 };
@@ -72,8 +72,10 @@ enum VS_INPUT
 {
    VS_I_RECT,
    VS_I_VPOS,
-   VS_I_EB0,
-   VS_I_EB1,
+   VS_I_EB_0_0,
+   VS_I_EB_0_1,
+   VS_I_EB_1_0,
+   VS_I_EB_1_1,
    VS_I_INTERLACED,
    VS_I_MV0,
    VS_I_MV1,
@@ -90,8 +92,11 @@ enum VS_OUTPUT
    VS_O_TEX0,
    VS_O_TEX1,
    VS_O_TEX2,
-   VS_O_EB0,
-   VS_O_EB1,
+   VS_O_EB_0_0,
+   VS_O_EB_0_1,
+   VS_O_EB_1_0,
+   VS_O_EB_1_1,
+   VS_O_INTERLACED,
    VS_O_MV0,
    VS_O_MV1,
    VS_O_MV2,
@@ -124,9 +129,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 {
    struct ureg_program *shader;
    struct ureg_src norm, mbs;
-   struct ureg_src vrect, vpos, eb[2], interlaced, vmv[4];
+   struct ureg_src vrect, vpos, eb[2][2], interlaced, vmv[4];
    struct ureg_dst scale, t_vpos, t_vtex;
-   struct ureg_dst o_vpos, o_vtex[3], o_eb[2], o_vmv[4], o_line;
+   struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2][2], o_interlaced, o_vmv[4];
    unsigned i, j, count, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -142,8 +147,10 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
-   eb[0] = ureg_DECL_vs_input(shader, VS_I_EB0);
-   eb[1] = ureg_DECL_vs_input(shader, VS_I_EB1);
+   eb[0][0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0);
+   eb[1][0] = ureg_DECL_vs_input(shader, VS_I_EB_1_0);
+   eb[0][1] = ureg_DECL_vs_input(shader, VS_I_EB_0_1);
+   eb[1][1] = ureg_DECL_vs_input(shader, VS_I_EB_1_1);
    interlaced = ureg_DECL_vs_input(shader, VS_I_INTERLACED);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
@@ -151,8 +158,11 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
    o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
    o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);
-   o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB0);
-   o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB1);
+   o_eb[0][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_0);
+   o_eb[0][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_1);
+   o_eb[1][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_0);
+   o_eb[1][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_1);
+   o_interlaced = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INTERLACED);
    
    count=0;
    for (i = 0; i < ref_frames; ++i) {
@@ -175,8 +185,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
     * o_vpos.xy = t_vpos
     * o_vpos.zw = vpos
     *
-    * o_line.x = 1
-    * o_line.y = vpos.y * 8
+    * o_line = vpos * 8
     *
     * if(interlaced) {
     *    t_vtex.x = vrect.x
@@ -205,8 +214,8 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
-   ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f));
-   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
+   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_XY), vrect, 
+      ureg_imm2f(shader, MACROBLOCK_WIDTH / 2, MACROBLOCK_HEIGHT / 2));
 
    ureg_IF(shader, interlaced, &label);
 
@@ -225,8 +234,12 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_ENDIF(shader);
    ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
 
-   ureg_MOV(shader, o_eb[0], eb[0]);
-   ureg_MOV(shader, o_eb[1], eb[1]);
+   ureg_MOV(shader, o_eb[0][0], eb[0][0]);
+   ureg_MOV(shader, o_eb[0][1], eb[0][1]);
+   ureg_MOV(shader, o_eb[1][0], eb[1][0]);
+   ureg_MOV(shader, o_eb[1][1], eb[1][1]);
+
+   ureg_MOV(shader, o_interlaced, interlaced);
 
    if(count > 0) {
       ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, ureg_imm1f(shader, 0.5f));
@@ -255,11 +268,13 @@ calc_field(struct ureg_program *shader)
    /*
     * line going from 0 to 8 in steps of 0.5
     *
-    * tmp = fraction(line)
-    * tmp = tmp >= 0.5 ? 1 : 0
+    * tmp.z = fraction(line.y)
+    * tmp.z = tmp.z >= 0.5 ? 1 : 0
+    * tmp.xy = line > 4 ? 1 : 0
     */
-   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(line, TGSI_SWIZZLE_Y));
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line, ureg_imm2f(shader, BLOCK_WIDTH / 2, BLOCK_HEIGHT / 2));
 
    return tmp;
 }
@@ -267,20 +282,26 @@ calc_field(struct ureg_program *shader)
 static struct ureg_dst
 fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src tc[3], eb[2];
+   struct ureg_src tc[3], eb[2][2], interlaced;
    struct ureg_src sampler[3];
-   struct ureg_dst texel, tmp;
-   unsigned i, label;
+   struct ureg_dst texel, t_tc, t_field, tmp;
+   unsigned i, l_interlaced, l_y, l_x;
 
    texel = ureg_DECL_temporary(shader);
+   t_tc = ureg_DECL_temporary(shader);
+   t_field = ureg_DECL_temporary(shader);
    tmp = ureg_DECL_temporary(shader);
 
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
    tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR);
 
-   eb[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB0, TGSI_INTERPOLATE_LINEAR);
-   eb[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB1, TGSI_INTERPOLATE_LINEAR);
+   eb[0][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_0, TGSI_INTERPOLATE_CONSTANT);
+   eb[0][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_1, TGSI_INTERPOLATE_CONSTANT);
+   eb[1][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_0, TGSI_INTERPOLATE_CONSTANT);
+   eb[1][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_1, TGSI_INTERPOLATE_CONSTANT);
+
+   interlaced = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INTERLACED, TGSI_INTERPOLATE_CONSTANT);
 
    for (i = 0; i < 3; ++i)  {
       sampler[i] = ureg_DECL_sampler(shader, i);
@@ -291,25 +312,45 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
     * texel.cb = tex(tc[2], sampler[1])
     * texel.cr = tex(tc[2], sampler[2])
     */
+   ureg_MOV(shader, ureg_writemask(t_field, TGSI_WRITEMASK_XY), ureg_src(field));
+   ureg_IF(shader, interlaced, &l_interlaced);
+      ureg_MOV(shader, ureg_writemask(t_field, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z));
+   ureg_ENDIF(shader);
+
    for (i = 0; i < 3; ++i) {
       if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
-         ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
-            ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), tc[1]);
-            ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(eb[1], TGSI_SWIZZLE_X + i));
-         ureg_ELSE(shader, &label);
-            ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), tc[0]);
-            ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(eb[0], TGSI_SWIZZLE_X + i));
+         ureg_IF(shader, ureg_scalar(ureg_src(t_field), TGSI_SWIZZLE_Y), &l_y);
+            ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[1]);
+
+            ureg_IF(shader, ureg_scalar(ureg_src(t_field), TGSI_SWIZZLE_X), &l_x);
+               ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(eb[1][1], TGSI_SWIZZLE_X + i));
+            ureg_ELSE(shader, &l_x);
+               ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(eb[1][0], TGSI_SWIZZLE_X + i));
+            ureg_ENDIF(shader);
+
+         ureg_ELSE(shader, &l_y);
+            ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[0]);
+
+            ureg_IF(shader, ureg_scalar(ureg_src(t_field), TGSI_SWIZZLE_X), &l_x);
+               ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(eb[0][1], TGSI_SWIZZLE_X + i));
+            ureg_ELSE(shader, &l_x);
+               ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(eb[0][0], TGSI_SWIZZLE_X + i));
+            ureg_ENDIF(shader);
+
          ureg_ENDIF(shader);
+
       } else {
-         ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), tc[2]);
-         ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(eb[0], TGSI_SWIZZLE_X + i));
+         ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[2]);
+         ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(eb[0][0], TGSI_SWIZZLE_X + i));
       }
 
       /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
-      ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(tmp), sampler[i]);
+      ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler[i]);
       ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
    }
 
+   ureg_release_temporary(shader, t_field);
+   ureg_release_temporary(shader, t_tc);
    ureg_release_temporary(shader, tmp);
 
    return texel;
@@ -413,7 +454,7 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    field = calc_field(shader);
    texel = fetch_ycbcr(r, shader, field);
 
-   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
+   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z), &label);
       ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[1], sampler);
    ureg_ELSE(shader, &label);
       ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[0], sampler);
@@ -511,7 +552,7 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    field = calc_field(shader);
    texel = fetch_ycbcr(r, shader, field);
 
-   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
+   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z), &label);
       ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
       ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[3], sampler[1]);
    ureg_ELSE(shader, &label);
@@ -800,20 +841,32 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    vertex_elems[VS_I_VPOS].vertex_buffer_index = 1;
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /* y, cr, cb z-coordinate element top field */
-   vertex_elems[VS_I_EB0].src_offset = sizeof(struct vertex2f);
-   vertex_elems[VS_I_EB0].instance_divisor = 0;
-   vertex_elems[VS_I_EB0].vertex_buffer_index = 1;
-   vertex_elems[VS_I_EB0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
-
-   /* y, cr, cb z-coordinate element bottom field */
-   vertex_elems[VS_I_EB1].src_offset = sizeof(struct vertex2f) + sizeof(float) * 3;
-   vertex_elems[VS_I_EB1].instance_divisor = 0;
-   vertex_elems[VS_I_EB1].vertex_buffer_index = 1;
-   vertex_elems[VS_I_EB1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+   /* y, cr, cb z-coordinate element top left block */
+   vertex_elems[VS_I_EB_0_0].src_offset = sizeof(struct vertex2f);
+   vertex_elems[VS_I_EB_0_0].instance_divisor = 0;
+   vertex_elems[VS_I_EB_0_0].vertex_buffer_index = 1;
+   vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+
+   /* y, cr, cb z-coordinate element top right block */
+   vertex_elems[VS_I_EB_0_1].src_offset = sizeof(struct vertex2f) + sizeof(float) * 3;
+   vertex_elems[VS_I_EB_0_1].instance_divisor = 0;
+   vertex_elems[VS_I_EB_0_1].vertex_buffer_index = 1;
+   vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+
+   /* y, cr, cb z-coordinate element bottom left block */
+   vertex_elems[VS_I_EB_1_0].src_offset = sizeof(struct vertex2f) + sizeof(float) * 6;
+   vertex_elems[VS_I_EB_1_0].instance_divisor = 0;
+   vertex_elems[VS_I_EB_1_0].vertex_buffer_index = 1;
+   vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+
+   /* y, cr, cb z-coordinate element bottom right block */
+   vertex_elems[VS_I_EB_1_1].src_offset = sizeof(struct vertex2f) + sizeof(float) * 9;
+   vertex_elems[VS_I_EB_1_1].instance_divisor = 0;
+   vertex_elems[VS_I_EB_1_1].vertex_buffer_index = 1;
+   vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
 
    /* progressive=1.0f interlaced=0.0f */
-   vertex_elems[VS_I_INTERLACED].src_offset = sizeof(struct vertex2f) + sizeof(float) * 6;
+   vertex_elems[VS_I_INTERLACED].src_offset = sizeof(struct vertex2f) + sizeof(float) * 12;
    vertex_elems[VS_I_INTERLACED].instance_divisor = 0;
    vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 1;
    vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
@@ -842,9 +895,9 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    vertex_elems[VS_I_MV3].vertex_buffer_index = 3;
    vertex_elems[VS_I_MV3].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 5, vertex_elems);
-   r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 7, vertex_elems);
-   r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 9, vertex_elems);
+   r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 7, vertex_elems);
+   r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 9, vertex_elems);
+   r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 11, vertex_elems);
 
    r->vs_const_buf = pipe_buffer_create
    (
@@ -924,8 +977,7 @@ get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
 }
 
 void
-gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
-                unsigned luma_mask_0, unsigned luma_mask_1, unsigned cb_mask, unsigned cr_mask)
+gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb)
 {
    unsigned cbp = mb->cbp;
    unsigned i;
@@ -936,26 +988,20 @@ gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb,
    v.pos.x = mb->mbx;
    v.pos.y = mb->mby;
 
-   v.field[0].luma_eb = (cbp & luma_mask_0) ? 0.0f : -1.0f;
-   v.field[1].luma_eb = (cbp & luma_mask_1) ? 0.0f : -1.0f;
+   v.field[0][0].luma_eb = cbp & 32 ? 0.0f : -1.0f;
+   v.field[0][1].luma_eb = cbp & 16 ? 0.0f : -1.0f;
+   v.field[1][0].luma_eb = cbp & 8 ? 0.0f : -1.0f;
+   v.field[1][1].luma_eb = cbp & 4 ? 0.0f : -1.0f;
 
-   if (cbp & cb_mask) {
-      v.field[0].cb_eb = 0.0f;
-      v.field[1].cb_eb = 0.0f;
-   }
-   else {
-      v.field[0].cb_eb = -1.0f;
-      v.field[1].cb_eb = -1.0f;
-   }
+   v.field[0][0].cb_eb = cbp & 2 ? 0.0f : -1.0f;
+   v.field[0][1].cb_eb = cbp & 2 ? 0.0f : -1.0f;
+   v.field[1][0].cb_eb = cbp & 2 ? 0.0f : -1.0f;
+   v.field[1][1].cb_eb = cbp & 2 ? 0.0f : -1.0f;
 
-   if (cbp & cr_mask) {
-      v.field[0].cr_eb = 0.0f;
-      v.field[1].cr_eb = 0.0f;
-   }
-   else {
-      v.field[0].cr_eb = -1.0f;
-      v.field[1].cr_eb = -1.0f;
-   }
+   v.field[0][0].cr_eb = cbp & 1 ? 0.0f : -1.0f;
+   v.field[0][1].cr_eb = cbp & 1 ? 0.0f : -1.0f;
+   v.field[1][0].cr_eb = cbp & 1 ? 0.0f : -1.0f;
+   v.field[1][1].cr_eb = cbp & 1 ? 0.0f : -1.0f;
    
    v.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
 
@@ -1077,15 +1123,15 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
          struct vert_stream_0 *vb = ycbcr_vb + pos * 16;
 
          if(mb->dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
-            gen_block_verts(vb     , mb, 32, 32, 2, 1);
-            gen_block_verts(vb + 4 , mb, 16, 16, 2, 1);
-            gen_block_verts(vb + 8 , mb, 8,   8, 2, 1);
-            gen_block_verts(vb + 12, mb, 4,   4, 2, 1);
+            gen_block_verts(vb     , mb);
+            gen_block_verts(vb + 4 , mb);
+            gen_block_verts(vb + 8 , mb);
+            gen_block_verts(vb + 12, mb);
          } else {
-            gen_block_verts(vb     , mb, 32, 8, 2, 1);
-            gen_block_verts(vb + 4 , mb, 16, 4, 2, 1);
-            gen_block_verts(vb + 8 , mb, 32, 8, 2, 1);
-            gen_block_verts(vb + 12, mb, 16, 4, 2, 1);
+            gen_block_verts(vb     , mb);
+            gen_block_verts(vb + 4 , mb);
+            gen_block_verts(vb + 8 , mb);
+            gen_block_verts(vb + 12, mb);
          }
 
          break;
-- 
cgit v1.2.3


From 0bc51ba484912e0b1c51922d409ae2645594e7ec Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 13 Nov 2010 17:16:27 +0100
Subject: [g3dvl] switch to using macroblock vertices

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 132 ++++++++++-------------
 1 file changed, 54 insertions(+), 78 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index aae549cac49..6195eafcc56 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -117,11 +117,8 @@ enum MACROBLOCK_TYPE
 };
 
 /* vertices for four quads covering the blocks */
-static const struct vertex2f const_quads[4][4] = {
-   { {0.0f, 0.0f}, {0.5f, 0.0f}, {0.5f, 0.5f}, {0.0f, 0.5f} },
-   { {0.5f, 0.0f}, {1.0f, 0.0f}, {1.0f, 0.5f}, {0.5f, 0.5f} },
-   { {0.0f, 0.5f}, {0.5f, 0.5f}, {0.5f, 1.0f}, {0.0f, 1.0f} },
-   { {0.5f, 0.5f}, {1.0f, 0.5f}, {1.0f, 1.0f}, {0.5f, 1.0f} },
+static const struct vertex2f const_quad[4] = {
+   {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
 };
 
 static void *
@@ -794,36 +791,36 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    }
 
    r->vertex_bufs.individual.rect.stride = sizeof(struct vertex2f);
-   r->vertex_bufs.individual.rect.max_index = 16 * r->macroblocks_per_batch - 1;
+   r->vertex_bufs.individual.rect.max_index = 4 * r->macroblocks_per_batch - 1;
    r->vertex_bufs.individual.rect.buffer_offset = 0;
    r->vertex_bufs.individual.rect.buffer = pipe_buffer_create
    (
       r->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vertex2f) * 16 * r->macroblocks_per_batch
+      sizeof(struct vertex2f) * 4 * r->macroblocks_per_batch
    );
 
    r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vert_stream_0);
-   r->vertex_bufs.individual.ycbcr.max_index = 16 * r->macroblocks_per_batch - 1;
+   r->vertex_bufs.individual.ycbcr.max_index = 4 * r->macroblocks_per_batch - 1;
    r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
    /* XXX: Create with usage DYNAMIC or STREAM */
    r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
    (
       r->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vert_stream_0) * 16 * r->macroblocks_per_batch
+      sizeof(struct vert_stream_0) * 4 * r->macroblocks_per_batch
    );
 
    for (i = 0; i < 2; ++i) {
       r->vertex_bufs.individual.ref[i].stride = sizeof(struct vertex2f) * 2;
-      r->vertex_bufs.individual.ref[i].max_index = 16 * r->macroblocks_per_batch - 1;
+      r->vertex_bufs.individual.ref[i].max_index = 4 * r->macroblocks_per_batch - 1;
       r->vertex_bufs.individual.ref[i].buffer_offset = 0;
       /* XXX: Create with usage DYNAMIC or STREAM */
       r->vertex_bufs.individual.ref[i].buffer = pipe_buffer_create
       (
          r->pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
-         sizeof(struct vertex2f) * 2 * 16 * r->macroblocks_per_batch
+         sizeof(struct vertex2f) * 2 * 4 * r->macroblocks_per_batch
       );
    }
 
@@ -925,7 +922,7 @@ init_const_buffers(struct vl_mpeg12_mc_renderer *r)
    );
 
    for ( i = 0; i < r->macroblocks_per_batch; ++i)
-     memcpy(rect + i * 16, &const_quads, sizeof(const_quads));
+     memcpy(rect + i * 4, &const_quad, sizeof(const_quad));
 
    pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.rect.buffer, buf_transfer);
    
@@ -976,39 +973,6 @@ get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
    return -1;
 }
 
-void
-gen_block_verts(struct vert_stream_0 *vb, struct pipe_mpeg12_macroblock *mb)
-{
-   unsigned cbp = mb->cbp;
-   unsigned i;
-   struct vert_stream_0 v;
-
-   assert(vb);
-
-   v.pos.x = mb->mbx;
-   v.pos.y = mb->mby;
-
-   v.field[0][0].luma_eb = cbp & 32 ? 0.0f : -1.0f;
-   v.field[0][1].luma_eb = cbp & 16 ? 0.0f : -1.0f;
-   v.field[1][0].luma_eb = cbp & 8 ? 0.0f : -1.0f;
-   v.field[1][1].luma_eb = cbp & 4 ? 0.0f : -1.0f;
-
-   v.field[0][0].cb_eb = cbp & 2 ? 0.0f : -1.0f;
-   v.field[0][1].cb_eb = cbp & 2 ? 0.0f : -1.0f;
-   v.field[1][0].cb_eb = cbp & 2 ? 0.0f : -1.0f;
-   v.field[1][1].cb_eb = cbp & 2 ? 0.0f : -1.0f;
-
-   v.field[0][0].cr_eb = cbp & 1 ? 0.0f : -1.0f;
-   v.field[0][1].cr_eb = cbp & 1 ? 0.0f : -1.0f;
-   v.field[1][0].cr_eb = cbp & 1 ? 0.0f : -1.0f;
-   v.field[1][1].cr_eb = cbp & 1 ? 0.0f : -1.0f;
-   
-   v.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
-
-   for ( i = 0; i < 4; ++i )
-     memcpy(vb + i, &v, sizeof(v));
-}
-
 void
 gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
                      struct pipe_mpeg12_macroblock *mb, unsigned pos,
@@ -1033,13 +997,13 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
 
          assert(ref_vb && ref_vb[1]);
 
-         vb = ref_vb[1] + pos * 2 * 16;
+         vb = ref_vb[1] + pos * 2 * 4;
 
          mo_vec[0].x = mb->pmv[0][1][0];
          mo_vec[0].y = mb->pmv[0][1][1];
 
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            for (i = 0; i < 16 * 2; i += 2) {
+            for (i = 0; i < 4 * 2; i += 2) {
                vb[i].x = mo_vec[0].x;
                vb[i].y = mo_vec[0].y;
             }
@@ -1053,7 +1017,7 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
             if(mb->mvfs[0][1]) mo_vec[0].y += 2;
             if(!mb->mvfs[1][1]) mo_vec[1].y -= 2;
 
-            for (i = 0; i < 16 * 2; i += 2) {
+            for (i = 0; i < 4 * 2; i += 2) {
                vb[i].x = mo_vec[0].x;
                vb[i].y = mo_vec[0].y;
                vb[i + 1].x = mo_vec[1].x;
@@ -1070,7 +1034,7 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
 
          assert(ref_vb && ref_vb[0]);
 
-         vb = ref_vb[0] + pos * 2 * 16;
+         vb = ref_vb[0] + pos * 2 * 4;
 
          if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
             mo_vec[0].x = mb->pmv[0][1][0];
@@ -1102,13 +1066,13 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
          }
 
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            for (i = 0; i < 16 * 2; i += 2) {
+            for (i = 0; i < 4 * 2; i += 2) {
                vb[i].x = mo_vec[0].x;
                vb[i].y = mo_vec[0].y;
             }
          }
          else {
-            for (i = 0; i < 16 * 2; i += 2) {
+            for (i = 0; i < 4 * 2; i += 2) {
                vb[i].x = mo_vec[0].x;
                vb[i].y = mo_vec[0].y;
                vb[i + 1].x = mo_vec[1].x;
@@ -1120,19 +1084,31 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
       }
       case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
       {
-         struct vert_stream_0 *vb = ycbcr_vb + pos * 16;
-
-         if(mb->dct_type == PIPE_MPEG12_DCT_TYPE_FRAME) {
-            gen_block_verts(vb     , mb);
-            gen_block_verts(vb + 4 , mb);
-            gen_block_verts(vb + 8 , mb);
-            gen_block_verts(vb + 12, mb);
-         } else {
-            gen_block_verts(vb     , mb);
-            gen_block_verts(vb + 4 , mb);
-            gen_block_verts(vb + 8 , mb);
-            gen_block_verts(vb + 12, mb);
-         }
+         struct vert_stream_0 *vb = ycbcr_vb + pos * 4;
+         struct vert_stream_0 v;
+
+         v.pos.x = mb->mbx;
+         v.pos.y = mb->mby;
+
+         v.field[0][0].luma_eb = mb->cbp & 32 ? 0.0f : -1.0f;
+         v.field[0][1].luma_eb = mb->cbp & 16 ? 0.0f : -1.0f;
+         v.field[1][0].luma_eb = mb->cbp & 8 ? 0.0f : -1.0f;
+         v.field[1][1].luma_eb = mb->cbp & 4 ? 0.0f : -1.0f;
+
+         v.field[0][0].cb_eb = mb->cbp & 2 ? 0.0f : -1.0f;
+         v.field[0][1].cb_eb = mb->cbp & 2 ? 0.0f : -1.0f;
+         v.field[1][0].cb_eb = mb->cbp & 2 ? 0.0f : -1.0f;
+         v.field[1][1].cb_eb = mb->cbp & 2 ? 0.0f : -1.0f;
+
+         v.field[0][0].cr_eb = mb->cbp & 1 ? 0.0f : -1.0f;
+         v.field[0][1].cr_eb = mb->cbp & 1 ? 0.0f : -1.0f;
+         v.field[1][0].cr_eb = mb->cbp & 1 ? 0.0f : -1.0f;
+         v.field[1][1].cr_eb = mb->cbp & 1 ? 0.0f : -1.0f;
+   
+         v.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
+
+         for ( i = 0; i < 4; ++i )
+            memcpy(vb + i, &v, sizeof(v));
 
          break;
       }
@@ -1243,8 +1219,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_fs_state(r->pipe, r->i_fs);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_INTRA] * 16);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 16;
+                       num_macroblocks[MACROBLOCK_TYPE_INTRA] * 4);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 4;
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
@@ -1258,8 +1234,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 16);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 16;
+                       num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 4);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 4;
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0) {
@@ -1273,8 +1249,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 16);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 16;
+                       num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 4);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 4;
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
@@ -1288,8 +1264,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 16);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 16;
+                       num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 4);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 4;
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0) {
@@ -1303,8 +1279,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 16);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 16;
+                       num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 4);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 4;
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
@@ -1320,8 +1296,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 16);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 16;
+                       num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 4);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 4;
    }
 
    if (num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0) {
@@ -1337,8 +1313,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 16);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 16;
+                       num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 4);
+      vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 4;
    }
 
    r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
-- 
cgit v1.2.3


From 48e19e8f35b2bbf437f6dcfe3213098690b8c925 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 14 Nov 2010 02:17:39 +0100
Subject: [g3dvl] fix of my one bug in SCALE_FACTOR_16_TO_9

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 6195eafcc56..fb420cbe1f7 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -42,7 +42,7 @@
 #define MACROBLOCK_HEIGHT 16
 #define BLOCK_WIDTH 8
 #define BLOCK_HEIGHT 8
-#define SCALE_FACTOR_16_TO_9 (32767.0f / 255.0f)
+#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
 
 struct vertex_shader_consts
 {
-- 
cgit v1.2.3


From 3886295a0cd1a51b1fa82fb076d826471d4697bb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 14 Nov 2010 20:14:25 +0100
Subject: [g3dvl] remove need for XVMC_INTRA_UNSIGNED

Move from unsigned to signed intra dct blocks.
You also need to update xf86-video-ati for this to work.
---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index fb420cbe1f7..5c0404b4b07 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -341,7 +341,7 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
          ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(eb[0][0], TGSI_SWIZZLE_X + i));
       }
 
-      /* Nouveau can't writemask tex dst regs (yet?), do in two steps */
+      /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
       ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler[i]);
       ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
    }
@@ -368,11 +368,13 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    /*
     * texel = fetch_ycbcr()
-    * fragment = texel * scale
+    * fragment = texel * scale + 0.5
     */
    field = calc_field(shader);
    texel = fetch_ycbcr(r, shader, field);
-   ureg_MUL(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X));
+   ureg_MAD(shader, fragment, ureg_src(texel), 
+            ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), 
+            ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
-- 
cgit v1.2.3


From 3cbe27a9888b94d1ab24b5e76ebd7563a7d8c6b8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 14 Nov 2010 20:35:02 +0100
Subject: [g3dvl] ups missed this check

---
 src/gallium/state_trackers/xorg/xvmc/context.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index 5e4af9e555a..4869aac3a2e 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -213,8 +213,8 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Non-MPEG2/Mocomp acceleration unsupported.\n");
       return BadImplementation;
    }
-   if (!(surface_flags & XVMC_INTRA_UNSIGNED)) {
-      XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Signed intra unsupported.\n");
+   if (surface_flags & XVMC_INTRA_UNSIGNED) {
+      XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Unsigned intra unsupported.\n");
       return BadImplementation;
    }
 
-- 
cgit v1.2.3


From 508a4a056c3140dc1f90b93acd46c06c30f7094e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 14 Nov 2010 23:16:49 +0100
Subject: [g3dvl] add skeleton and incomplete idct

---
 src/gallium/auxiliary/Makefile                   |   3 +-
 src/gallium/auxiliary/vl/vl_idct.c               | 505 +++++++++++++++++++++++
 src/gallium/auxiliary/vl/vl_idct.h               |  90 ++++
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  88 +---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   6 +-
 5 files changed, 616 insertions(+), 76 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_idct.c
 create mode 100644 src/gallium/auxiliary/vl/vl_idct.h

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 49ff1653e0e..07b3372c914 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -147,7 +147,8 @@ C_SOURCES = \
 	vl/vl_bitstream_parser.c \
 	vl/vl_mpeg12_mc_renderer.c \
 	vl/vl_compositor.c \
-	vl/vl_csc.c
+	vl/vl_csc.c \
+        vl/vl_idct.c
 
 GALLIVM_SOURCES = \
         gallivm/lp_bld_arit.c \
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
new file mode 100644
index 00000000000..ce535ad3862
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -0,0 +1,505 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vl_idct.h"
+#include <assert.h>
+#include <pipe/p_context.h>
+#include <pipe/p_screen.h>
+#include <util/u_inlines.h>
+#include <util/u_sampler.h>
+#include <util/u_format.h>
+#include <tgsi/tgsi_ureg.h>
+#include "vl_types.h"
+
+#define BLOCK_WIDTH 8
+#define BLOCK_HEIGHT 8
+#define SCALE_FACTOR_16_TO_12 (32768.0f / 2048.0f)
+#define SCALE_FACTOR_9_TO_16 (256.0f / 32768.0f)
+
+struct vertex_shader_consts
+{
+   struct vertex4f norm;
+};
+
+enum VS_INPUT
+{
+   VS_I_RECT,
+   VS_I_VPOS,
+
+   NUM_VS_INPUTS
+};
+
+enum VS_OUTPUT
+{
+   VS_O_VPOS,
+   VS_O_BLOCK,
+   VS_O_TEX,
+   VS_O_START,
+   VS_O_STEP
+};
+
+
+const float const_matrix[8][8] = {
+   {  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.353553f,  0.3535530f },
+   {  0.4903930f,  0.4157350f,  0.2777850f,  0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f },
+   {  0.4619400f,  0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f,  0.191342f,  0.4619400f },
+   {  0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f,  0.2777850f,  0.4903930f,  0.097545f, -0.4157350f },
+   {  0.3535530f, -0.3535530f, -0.3535530f,  0.3535540f,  0.3535530f, -0.3535540f, -0.353553f,  0.3535530f },
+   {  0.2777850f, -0.4903930f,  0.0975452f,  0.4157350f, -0.4157350f, -0.0975451f,  0.490393f, -0.2777850f },
+   {  0.1913420f, -0.4619400f,  0.4619400f, -0.1913420f, -0.1913410f,  0.4619400f, -0.461940f,  0.1913420f },
+   {  0.0975451f, -0.2777850f,  0.4157350f, -0.4903930f,  0.4903930f, -0.4157350f,  0.277786f, -0.0975458f }
+};
+
+const float const_transpose[8][8] = {
+   {  0.3535530f,  0.4903930f,  0.4619400f,  0.4157350f,  0.3535530f,  0.2777850f,  0.191342f,  0.0975451f },
+   {  0.3535530f,  0.4157350f,  0.1913420f, -0.0975452f, -0.3535530f, -0.4903930f, -0.461940f, -0.2777850f },
+   {  0.3535530f,  0.2777850f, -0.1913420f, -0.4903930f, -0.3535530f,  0.0975452f,  0.461940f,  0.4157350f },
+   {  0.3535530f,  0.0975451f, -0.4619400f, -0.2777850f,  0.3535540f,  0.4157350f, -0.191342f, -0.4903930f },
+   {  0.3535530f, -0.0975452f, -0.4619400f,  0.2777850f,  0.3535530f, -0.4157350f, -0.191341f,  0.4903930f },
+   {  0.3535530f, -0.2777850f, -0.1913420f,  0.4903930f, -0.3535540f, -0.0975451f,  0.461940f, -0.4157350f },
+   {  0.3535530f, -0.4157350f,  0.1913420f,  0.0975450f, -0.3535530f,  0.4903930f, -0.461940f,  0.2777860f },
+   {  0.3535530f, -0.4903930f,  0.4619400f, -0.4157350f,  0.3535530f, -0.2777850f,  0.191342f, -0.0975458f }
+};
+
+static void *
+create_vert_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+   struct ureg_src norm, bs;
+   struct ureg_src vrect, vpos;
+   struct ureg_dst scale, t_vpos;
+   struct ureg_dst o_vpos, o_block, o_tex, o_start, o_step;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   norm = ureg_DECL_constant(shader, 0);
+   bs = ureg_imm2f(shader, BLOCK_WIDTH, BLOCK_HEIGHT);
+
+   scale = ureg_DECL_temporary(shader);
+   t_vpos = ureg_DECL_temporary(shader);
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+   o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
+   o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_TEX);
+   o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_START);
+   o_step = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP);
+
+   /*
+    * scale = norm * mbs;
+    *
+    * t_vpos = vpos + vrect
+    * o_vpos.xy = t_vpos * scale
+    * o_vpos.zw = vpos
+    *
+    * o_block = vrect
+    * o_tex = t_pos
+    * o_start = vpos * scale
+    * o_step = norm
+    *
+    */
+   ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, bs);
+
+   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
+   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), ureg_src(scale));
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
+
+   ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
+   ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, ureg_src(scale));
+   ureg_MOV(shader, ureg_writemask(o_step, TGSI_WRITEMASK_XY), norm);
+
+   ureg_release_temporary(shader, t_vpos);
+   ureg_release_temporary(shader, scale);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
+static void
+matrix_mul(struct ureg_program *shader, struct ureg_dst dst,
+           struct ureg_src tc[2], struct ureg_src sampler[2],
+           struct ureg_src start[2], struct ureg_src step[2],
+           float scale[2])
+{
+   struct ureg_dst t_tc[2], m[2][2], tmp[2];
+   unsigned i, j;
+
+   for(i = 0; i < 2; ++i) {
+      t_tc[i] = ureg_DECL_temporary(shader);
+      for(j = 0; j < 2; ++j)
+         m[i][j] = ureg_DECL_temporary(shader);
+      tmp[i] = ureg_DECL_temporary(shader);
+   }
+
+   /*
+    * m[0..1][0] = ?
+    * tmp[0..1] = dot4(m[0..1][0], m[0..1][1])
+    * fragment = tmp[0] + tmp[1]
+    */
+   ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), start[0]);
+   ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_Y), tc[0]);
+
+   ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), tc[1]);
+   ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), start[1]);
+
+   for(i = 0; i < 2; ++i) {
+      for(j = 0; j < 4; ++j) {
+         /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
+         ureg_TEX(shader, tmp[0], TGSI_TEXTURE_2D, ureg_src(t_tc[0]), sampler[0]);
+         ureg_MOV(shader, ureg_writemask(m[i][0], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp[0]), TGSI_SWIZZLE_X));
+
+         ureg_TEX(shader, tmp[1], TGSI_TEXTURE_2D, ureg_src(t_tc[1]), sampler[1]);
+         ureg_MOV(shader, ureg_writemask(m[i][1], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp[1]), TGSI_SWIZZLE_X));
+
+         ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), ureg_src(t_tc[0]), step[0]);
+         ureg_ADD(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_src(t_tc[1]), step[1]);
+      }
+
+      if(scale[0] != 1.0f)
+         ureg_MUL(shader, m[i][0], ureg_src(m[i][0]), ureg_scalar(ureg_imm1f(shader, scale[0]), TGSI_SWIZZLE_X));
+
+      if(scale[1] != 1.0f)
+         ureg_MUL(shader, m[i][1], ureg_src(m[i][1]), ureg_scalar(ureg_imm1f(shader, scale[1]), TGSI_SWIZZLE_X));
+   }
+
+   ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(m[0][0]), ureg_src(m[0][1]));
+   ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(m[1][0]), ureg_src(m[1][1]));
+   ureg_ADD(shader, ureg_writemask(dst, TGSI_WRITEMASK_X), ureg_src(tmp[0]), ureg_src(tmp[1]));
+
+   for(i = 0; i < 2; ++i) {
+      ureg_release_temporary(shader, t_tc[i]);
+      for(j = 0; j < 2; ++j)
+         ureg_release_temporary(shader, m[i][j]);
+      ureg_release_temporary(shader, tmp[i]);
+   }
+}
+
+static void *
+create_transpose_frag_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+   struct ureg_src tc[2], sampler[2];
+   struct ureg_src start[2], step[2];
+   struct ureg_dst fragment;
+   float scale[2];
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
+   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
+
+   start[0] = ureg_imm1f(shader, 0.0f);
+   start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
+
+   step[0] = ureg_imm1f(shader, 1.0f / BLOCK_HEIGHT);
+   step[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
+
+   sampler[0] = ureg_DECL_sampler(shader, 0);
+   sampler[1] = ureg_DECL_sampler(shader, 1);
+
+   scale[0] = 1.0f;
+   scale[1] = SCALE_FACTOR_16_TO_12;
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   matrix_mul(shader, fragment, tc, sampler, start, step, scale);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
+static void *
+create_matrix_frag_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+   struct ureg_src tc[2], sampler[2];
+   struct ureg_src start[2], step[2];
+   struct ureg_dst tmp, fragment;
+   float scale[2];
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   tmp = ureg_DECL_temporary(shader);
+
+   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
+   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
+
+   start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
+   start[1] = ureg_imm1f(shader, 0.0f);
+
+   step[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
+   step[1] = ureg_imm1f(shader, 1.0f / BLOCK_WIDTH);
+
+   sampler[0] = ureg_DECL_sampler(shader, 0);
+   sampler[1] = ureg_DECL_sampler(shader, 1);
+
+   scale[0] = 1.0f;
+   scale[1] = 1.0f;
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   matrix_mul(shader, tmp, tc, sampler, start, step, scale);
+   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_9_TO_16), TGSI_SWIZZLE_X));
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
+static void
+xfer_buffers_map(struct vl_idct *idct)
+{
+   struct pipe_box rect =
+   {
+      0, 0, 0,
+      idct->destination->width0,
+      idct->destination->height0,
+      1
+   };
+
+   idct->tex_transfer = idct->pipe->get_transfer
+   (
+      idct->pipe, idct->destination,
+      u_subresource(0, 0),
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &rect
+   );
+
+   idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
+}
+
+static void
+xfer_buffers_unmap(struct vl_idct *idct)
+{
+   idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
+   idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
+}
+
+static bool
+init_shaders(struct vl_idct *idct)
+{
+   assert(idct);
+
+   assert(idct->vs = create_vert_shader(idct));
+   assert(idct->transpose_fs = create_transpose_frag_shader(idct));
+   assert(idct->matrix_fs = create_matrix_frag_shader(idct));
+
+   return true;
+}
+
+static void
+cleanup_shaders(struct vl_idct *idct)
+{
+   assert(idct);
+
+   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
+   idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
+   idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
+}
+
+static bool
+init_buffers(struct vl_idct *idct)
+{
+   struct pipe_resource template;
+   struct pipe_sampler_view sampler_view;
+   struct pipe_vertex_element vertex_elems[2];
+
+   const unsigned max_blocks =
+      align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
+      align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
+      idct->destination->depth0;
+
+   unsigned i;
+
+   memset(&template, 0, sizeof(struct pipe_resource));
+   template.target = PIPE_TEXTURE_2D;
+   template.format = PIPE_FORMAT_R32_FLOAT;
+   template.last_level = 0;
+   template.width0 = 8;
+   template.height0 = 8;
+   template.depth0 = 1;
+   template.usage = PIPE_USAGE_IMMUTABLE;
+   template.bind = PIPE_BIND_SAMPLER_VIEW;
+   template.flags = 0;
+
+   idct->textures.individual.matrix = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
+   idct->textures.individual.transpose = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
+
+   template.format = idct->destination->format;
+   template.width0 = idct->destination->width0;
+   template.height0 = idct->destination->height0;
+   template.depth0 = idct->destination->depth0;
+   template.usage = PIPE_USAGE_DYNAMIC;
+   idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
+
+   template.format = PIPE_FORMAT_R32_FLOAT;
+   template.usage = PIPE_USAGE_STATIC;
+   idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
+
+   for (i = 0; i < 4; ++i) {
+      u_sampler_view_default_template(&sampler_view, idct->textures.all[i], idct->textures.all[i]->format);
+      idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
+   }
+
+   idct->quad.stride = sizeof(struct vertex2f);
+   idct->quad.max_index = 4 * max_blocks - 1;
+   idct->quad.buffer_offset = 0;
+   idct->quad.buffer = pipe_buffer_create
+   (
+      idct->pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      sizeof(struct vertex2f) * 4 * max_blocks
+   );
+
+   idct->pos.stride = sizeof(struct vertex2f);
+   idct->pos.max_index = 4 * max_blocks - 1;
+   idct->pos.buffer_offset = 0;
+   idct->pos.buffer = pipe_buffer_create
+   (
+      idct->pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      sizeof(struct vertex2f) * 4 * max_blocks
+   );
+
+   /* Rect element */
+   vertex_elems[0].src_offset = 0;
+   vertex_elems[0].instance_divisor = 0;
+   vertex_elems[0].vertex_buffer_index = 0;
+   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   /* Pos element */
+   vertex_elems[1].src_offset = 0;
+   vertex_elems[1].instance_divisor = 0;
+   vertex_elems[1].vertex_buffer_index = 1;
+   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
+
+   idct->vs_const_buf = pipe_buffer_create
+   (
+      idct->pipe->screen,
+      PIPE_BIND_CONSTANT_BUFFER,
+      sizeof(struct vertex_shader_consts)
+   );
+
+   return true;
+}
+
+static void
+cleanup_buffers(struct vl_idct *idct)
+{
+   unsigned i;
+
+   assert(idct);
+
+   pipe_resource_reference(&idct->vs_const_buf, NULL);
+
+   for (i = 0; i < 4; ++i) {
+      pipe_sampler_view_reference(&idct->sampler_views.all[i], NULL);
+      pipe_resource_reference(&idct->textures.all[i], NULL);
+   }
+
+   idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
+   pipe_resource_reference(&idct->quad.buffer, NULL);
+   pipe_resource_reference(&idct->pos.buffer, NULL);
+}
+
+bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst)
+{
+   assert(idct && pipe && dst);
+
+   idct->pipe = pipe;
+
+   idct->viewport.scale[0] = dst->width0;
+   idct->viewport.scale[1] = dst->height0;
+   idct->viewport.scale[2] = 1;
+   idct->viewport.scale[3] = 1;
+   idct->viewport.translate[0] = 0;
+   idct->viewport.translate[1] = 0;
+   idct->viewport.translate[2] = 0;
+   idct->viewport.translate[3] = 0;
+
+   idct->fb_state.width = dst->width0;
+   idct->fb_state.height = dst->height0;
+   idct->fb_state.nr_cbufs = 1;
+   idct->fb_state.zsbuf = NULL;
+
+   pipe_resource_reference(&idct->destination, dst);
+
+   if(!init_shaders(idct))
+      return false;
+
+   if(!init_buffers(idct)) {
+      cleanup_shaders(idct);
+      return false;
+   }
+
+   xfer_buffers_map(idct);
+
+   return true;
+}
+
+void vl_idct_cleanup(struct vl_idct *idct)
+{
+   cleanup_shaders(idct);
+   cleanup_buffers(idct);
+}
+
+void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
+{
+   unsigned tex_pitch;
+   short *texels;
+   unsigned i;
+
+   assert(idct);
+   assert(block);
+
+   tex_pitch = idct->tex_transfer->stride / util_format_get_blocksize(idct->tex_transfer->resource->format);
+   texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
+
+   for (i = 0; i < BLOCK_HEIGHT; ++i)
+      memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2);
+}
+
+void vl_idct_flush(struct vl_idct *idct)
+{
+   xfer_buffers_unmap(idct);
+   // TODO
+   xfer_buffers_map(idct);
+}
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
new file mode 100644
index 00000000000..02a3250399f
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -0,0 +1,90 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_idct_h
+#define vl_idct_h
+
+#include <pipe/p_state.h>
+
+struct vl_idct
+{
+   struct pipe_context *pipe;
+
+   struct pipe_viewport_state viewport;
+   struct pipe_resource *vs_const_buf;
+   struct pipe_framebuffer_state fb_state;
+
+   struct pipe_resource *destination;
+
+   void *vertex_elems_state;
+
+   union
+   {
+      void *all[4];
+      struct {
+         void *matrix, *transpose;
+         void *source, *intermediate;
+      } individual;
+   } samplers;
+
+   union
+   {
+      struct pipe_sampler_view *all[4];
+      struct {
+         struct pipe_sampler_view *matrix, *transpose;
+         struct pipe_sampler_view *source, *intermediate;
+      } individual;
+   } sampler_views;
+
+   void *vs;
+   void *transpose_fs, *matrix_fs;
+
+   union
+   {
+      struct pipe_resource *all[4];
+      struct {
+         struct pipe_resource *matrix, *transpose;
+         struct pipe_resource *source, *intermediate;
+      } individual;
+   } textures;
+
+   struct pipe_vertex_buffer quad;
+   struct pipe_vertex_buffer pos;
+
+   struct pipe_transfer *tex_transfer;
+   short *texels;
+};
+
+bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst);
+
+void vl_idct_cleanup(struct vl_idct *idct);
+
+void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block);
+
+void vl_idct_flush(struct vl_idct *idct);
+
+#endif
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 5c0404b4b07..7bc7ba91f94 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -572,47 +572,6 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
-static void
-xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
-{
-   unsigned i;
-
-   assert(r);
-
-   for (i = 0; i < 3; ++i) {
-      struct pipe_box rect =
-      {
-         0, 0, 0,
-         r->textures.all[i]->width0,
-         r->textures.all[i]->height0,
-         1
-      };
-
-      r->tex_transfer[i] = r->pipe->get_transfer
-      (
-         r->pipe, r->textures.all[i],
-         u_subresource(0, 0),
-         PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-         &rect
-      );
-
-      r->texels[i] = r->pipe->transfer_map(r->pipe, r->tex_transfer[i]);
-   }
-}
-
-static void
-xfer_buffers_unmap(struct vl_mpeg12_mc_renderer *r)
-{
-   unsigned i;
-
-   assert(r);
-
-   for (i = 0; i < 3; ++i) {
-      r->pipe->transfer_unmap(r->pipe, r->tex_transfer[i]);
-      r->pipe->transfer_destroy(r->pipe, r->tex_transfer[i]);
-   }
-}
-
 static bool
 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
 {
@@ -1209,7 +1168,10 @@ flush(struct vl_mpeg12_mc_renderer *r)
    assert(r);
    assert(r->num_macroblocks == r->macroblocks_per_batch);
 
-   xfer_buffers_unmap(r);
+   vl_idct_flush(&r->idct_y);
+   vl_idct_flush(&r->idct_cr);
+   vl_idct_flush(&r->idct_cb);
+
    gen_macroblock_stream(r, num_macroblocks);
 
    if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
@@ -1322,7 +1284,6 @@ flush(struct vl_mpeg12_mc_renderer *r)
    r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
 
    r->num_macroblocks = 0;
-   xfer_buffers_map(r);
 }
 
 static void
@@ -1352,40 +1313,20 @@ update_render_target(struct vl_mpeg12_mc_renderer *r)
    r->pipe->set_viewport_state(r->pipe, &r->viewport);
 }
 
-static void
-grab_coded_block(short *src, short *dst, unsigned dst_pitch)
-{
-   unsigned y;
-
-   assert(src);
-   assert(dst);
-
-   for (y = 0; y < BLOCK_HEIGHT; ++y)
-      memcpy(dst + y * dst_pitch, src + y * BLOCK_WIDTH, BLOCK_WIDTH * 2);
-}
-
 static void
 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
             enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
 {
-   unsigned tex_pitch;
-   short *texels;
    unsigned tb = 0, sb = 0;
-   unsigned mbpx = mbx * MACROBLOCK_WIDTH, mbpy = mby * MACROBLOCK_HEIGHT;
    unsigned x, y;
 
    assert(r);
    assert(blocks);
 
-   tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->resource->format);
-   texels = r->texels[0] + mbpy * tex_pitch + mbpx;
-
    for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
          if ((cbp >> (5 - tb)) & 1) {
-            grab_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT,
-                             texels + y * tex_pitch * BLOCK_HEIGHT +
-                             x * BLOCK_WIDTH, tex_pitch);
+            vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT);
             ++sb;
          }
       }
@@ -1394,15 +1335,12 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
    /* TODO: Implement 422, 444 */
    assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 
-   mbpx /= 2;
-   mbpy /= 2;
-
    for (tb = 0; tb < 2; ++tb) {
-      tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->resource->format);
-      texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
-
       if ((cbp >> (1 - tb)) & 1) {
-         grab_coded_block(blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT, texels, tex_pitch);
+         if(tb == 0)
+            vl_idct_add_block(&r->idct_cb, mbx, mby, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT);
+         else
+            vl_idct_add_block(&r->idct_cr, mbx, mby, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT);
          ++sb;
       }
    }
@@ -1499,7 +1437,9 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    renderer->future = NULL;
    renderer->num_macroblocks = 0;
 
-   xfer_buffers_map(renderer);
+   vl_idct_init(&renderer->idct_y, pipe, renderer->textures.individual.y);
+   vl_idct_init(&renderer->idct_cr, pipe, renderer->textures.individual.cr);
+   vl_idct_init(&renderer->idct_cb, pipe, renderer->textures.individual.cb);
 
    return true;
 }
@@ -1509,7 +1449,9 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 {
    assert(renderer);
 
-   xfer_buffers_unmap(renderer);
+   vl_idct_cleanup(&renderer->idct_y);
+   vl_idct_cleanup(&renderer->idct_cr);
+   vl_idct_cleanup(&renderer->idct_cb);
 
    util_delete_keymap(renderer->texview_map, renderer->pipe);
    cleanup_pipe_state(renderer);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 50c38f9ff10..ed48b5b6b45 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -32,6 +32,7 @@
 #include <pipe/p_state.h>
 #include <pipe/p_video_state.h>
 #include "vl_types.h"
+#include "vl_idct.h"
 
 struct pipe_context;
 struct pipe_macroblock;
@@ -57,6 +58,9 @@ struct vl_mpeg12_mc_renderer
    struct pipe_viewport_state viewport;
    struct pipe_resource *vs_const_buf;
    struct pipe_framebuffer_state fb_state;
+
+   struct vl_idct idct_y, idct_cb, idct_cr;
+
    union
    {
       void *all[3];
@@ -94,8 +98,6 @@ struct vl_mpeg12_mc_renderer
    struct pipe_fence_handle **fence;
    unsigned num_macroblocks;
    struct pipe_mpeg12_macroblock *macroblock_buf;
-   struct pipe_transfer *tex_transfer[3];
-   short *texels[3];
 
    struct keymap *texview_map;
 };
-- 
cgit v1.2.3


From e639e1b83ea65985cd84d12dc120d77cab80ba9e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 16 Nov 2010 22:30:50 +0100
Subject: [g3dvl] next round of idct implementation

---
 src/gallium/auxiliary/vl/vl_idct.c               | 243 ++++++++++++++++++++---
 src/gallium/auxiliary/vl/vl_idct.h               |  22 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  12 +-
 src/gallium/drivers/r600/r600_state_inlines.h    |   1 +
 4 files changed, 237 insertions(+), 41 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index ce535ad3862..55e0751891a 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -26,6 +26,7 @@
  **************************************************************************/
 
 #include "vl_idct.h"
+#include "util/u_draw.h"
 #include <assert.h>
 #include <pipe/p_context.h>
 #include <pipe/p_screen.h>
@@ -63,7 +64,7 @@ enum VS_OUTPUT
 };
 
 
-const float const_matrix[8][8] = {
+static const float const_matrix[8][8] = {
    {  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.353553f,  0.3535530f },
    {  0.4903930f,  0.4157350f,  0.2777850f,  0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f },
    {  0.4619400f,  0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f,  0.191342f,  0.4619400f },
@@ -74,7 +75,7 @@ const float const_matrix[8][8] = {
    {  0.0975451f, -0.2777850f,  0.4157350f, -0.4903930f,  0.4903930f, -0.4157350f,  0.277786f, -0.0975458f }
 };
 
-const float const_transpose[8][8] = {
+static const float const_transpose[8][8] = {
    {  0.3535530f,  0.4903930f,  0.4619400f,  0.4157350f,  0.3535530f,  0.2777850f,  0.191342f,  0.0975451f },
    {  0.3535530f,  0.4157350f,  0.1913420f, -0.0975452f, -0.3535530f, -0.4903930f, -0.461940f, -0.2777850f },
    {  0.3535530f,  0.2777850f, -0.1913420f, -0.4903930f, -0.3535530f,  0.0975452f,  0.461940f,  0.4157350f },
@@ -85,6 +86,11 @@ const float const_transpose[8][8] = {
    {  0.3535530f, -0.4903930f,  0.4619400f, -0.4157350f,  0.3535530f, -0.2777850f,  0.191342f, -0.0975458f }
 };
 
+/* vertices for a quad covering a block */
+static const struct vertex2f const_quad[4] = {
+   {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
+};
+
 static void *
 create_vert_shader(struct vl_idct *idct)
 {
@@ -109,8 +115,8 @@ create_vert_shader(struct vl_idct *idct)
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
    o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
-   o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_TEX);
-   o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_START);
+   o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
+   o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
    o_step = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP);
 
    /*
@@ -228,14 +234,15 @@ create_transpose_frag_shader(struct vl_idct *idct)
    step[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
 
    sampler[0] = ureg_DECL_sampler(shader, 0);
-   sampler[1] = ureg_DECL_sampler(shader, 1);
+   sampler[1] = ureg_DECL_sampler(shader, 2);
 
    scale[0] = 1.0f;
    scale[1] = SCALE_FACTOR_16_TO_12;
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   matrix_mul(shader, fragment, tc, sampler, start, step, scale);
+   //matrix_mul(shader, fragment, tc, sampler, start, step, scale);
+   //ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
 
    ureg_END(shader);
 
@@ -266,7 +273,7 @@ create_matrix_frag_shader(struct vl_idct *idct)
    step[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
    step[1] = ureg_imm1f(shader, 1.0f / BLOCK_WIDTH);
 
-   sampler[0] = ureg_DECL_sampler(shader, 0);
+   sampler[0] = ureg_DECL_sampler(shader, 3);
    sampler[1] = ureg_DECL_sampler(shader, 1);
 
    scale[0] = 1.0f;
@@ -274,8 +281,10 @@ create_matrix_frag_shader(struct vl_idct *idct)
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   matrix_mul(shader, tmp, tc, sampler, start, step, scale);
-   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_9_TO_16), TGSI_SWIZZLE_X));
+//   matrix_mul(shader, tmp, tc, sampler, start, step, scale);
+//   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_9_TO_16), TGSI_SWIZZLE_X));
+   ureg_TEX(shader, fragment, TGSI_TEXTURE_2D, tc[0], sampler[0]);
+   //ureg_MUL(shader, , ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, 1.0f));
 
    ureg_END(shader);
 
@@ -295,18 +304,32 @@ xfer_buffers_map(struct vl_idct *idct)
 
    idct->tex_transfer = idct->pipe->get_transfer
    (
+#if 0
+      idct->pipe, idct->textures.individual.intermediate,
+#else
       idct->pipe, idct->destination,
+#endif
       u_subresource(0, 0),
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &rect
    );
 
    idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
+
+   idct->vectors = pipe_buffer_map
+   (
+      idct->pipe,
+      idct->vertex_bufs.individual.pos.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &idct->vec_transfer
+   );
 }
 
 static void
 xfer_buffers_unmap(struct vl_idct *idct)
 {
+   pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, idct->vec_transfer);
+
    idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
    idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
 }
@@ -368,8 +391,8 @@ init_buffers(struct vl_idct *idct)
    template.usage = PIPE_USAGE_DYNAMIC;
    idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
-   template.format = PIPE_FORMAT_R32_FLOAT;
-   template.usage = PIPE_USAGE_STATIC;
+   //template.format = PIPE_FORMAT_R32_FLOAT;
+   //template.usage = PIPE_USAGE_STATIC;
    idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
    for (i = 0; i < 4; ++i) {
@@ -377,20 +400,20 @@ init_buffers(struct vl_idct *idct)
       idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
    }
 
-   idct->quad.stride = sizeof(struct vertex2f);
-   idct->quad.max_index = 4 * max_blocks - 1;
-   idct->quad.buffer_offset = 0;
-   idct->quad.buffer = pipe_buffer_create
+   idct->vertex_bufs.individual.quad.stride = sizeof(struct vertex2f);
+   idct->vertex_bufs.individual.quad.max_index = 4 * max_blocks - 1;
+   idct->vertex_bufs.individual.quad.buffer_offset = 0;
+   idct->vertex_bufs.individual.quad.buffer = pipe_buffer_create
    (
       idct->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
       sizeof(struct vertex2f) * 4 * max_blocks
    );
 
-   idct->pos.stride = sizeof(struct vertex2f);
-   idct->pos.max_index = 4 * max_blocks - 1;
-   idct->pos.buffer_offset = 0;
-   idct->pos.buffer = pipe_buffer_create
+   idct->vertex_bufs.individual.pos.stride = sizeof(struct vertex2f);
+   idct->vertex_bufs.individual.pos.max_index = 4 * max_blocks - 1;
+   idct->vertex_bufs.individual.pos.buffer_offset = 0;
+   idct->vertex_bufs.individual.pos.buffer = pipe_buffer_create
    (
       idct->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
@@ -436,18 +459,80 @@ cleanup_buffers(struct vl_idct *idct)
    }
 
    idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
-   pipe_resource_reference(&idct->quad.buffer, NULL);
-   pipe_resource_reference(&idct->pos.buffer, NULL);
+   pipe_resource_reference(&idct->vertex_bufs.individual.quad.buffer, NULL);
+   pipe_resource_reference(&idct->vertex_bufs.individual.pos.buffer, NULL);
 }
 
-bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst)
+static void
+init_constants(struct vl_idct *idct)
 {
-   assert(idct && pipe && dst);
+   struct pipe_transfer *buf_transfer;
+   struct vertex_shader_consts *vs_consts;
+   struct vertex2f *v;
 
-   idct->pipe = pipe;
+   unsigned i;
+
+   v = pipe_buffer_map
+   (
+      idct->pipe,
+      idct->vertex_bufs.individual.quad.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   for ( i = 0; i <= idct->vertex_bufs.individual.quad.max_index; i += 4)
+     memcpy(v + i, &const_quad, sizeof(const_quad));
+
+   pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.quad.buffer, buf_transfer);
 
-   idct->viewport.scale[0] = dst->width0;
-   idct->viewport.scale[1] = dst->height0;
+
+   v = pipe_buffer_map
+   (
+      idct->pipe,
+      idct->textures.individual.matrix,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   memcpy(v, &const_matrix, sizeof(const_matrix));
+
+   pipe_buffer_unmap(idct->pipe, idct->textures.individual.matrix, buf_transfer);
+
+   v = pipe_buffer_map
+   (
+      idct->pipe,
+      idct->textures.individual.transpose,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   memcpy(v, &const_transpose, sizeof(const_transpose));
+
+   pipe_buffer_unmap(idct->pipe, idct->textures.individual.transpose, buf_transfer);
+
+   vs_consts = pipe_buffer_map
+   (
+      idct->pipe, idct->vs_const_buf,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   vs_consts->norm.x = 1.0f / idct->destination->width0;
+   vs_consts->norm.y = 1.0f / idct->destination->height0;
+
+   pipe_buffer_unmap(idct->pipe, idct->vs_const_buf, buf_transfer);
+}
+
+static void
+init_state(struct vl_idct *idct)
+{
+   struct pipe_sampler_state sampler;
+   unsigned i;
+
+   idct->num_blocks = 0;
+
+   idct->viewport.scale[0] = idct->destination->width0;
+   idct->viewport.scale[1] = idct->destination->height0;
    idct->viewport.scale[2] = 1;
    idct->viewport.scale[3] = 1;
    idct->viewport.translate[0] = 0;
@@ -455,13 +540,50 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_r
    idct->viewport.translate[2] = 0;
    idct->viewport.translate[3] = 0;
 
-   idct->fb_state.width = dst->width0;
-   idct->fb_state.height = dst->height0;
+   idct->fb_state.width = idct->destination->width0;
+   idct->fb_state.height = idct->destination->height0;
    idct->fb_state.nr_cbufs = 1;
    idct->fb_state.zsbuf = NULL;
 
+   for (i = 0; i < 4; ++i) {
+      memset(&sampler, 0, sizeof(sampler));
+      sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+      sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+      sampler.compare_func = PIPE_FUNC_ALWAYS;
+      sampler.normalized_coords = 1;
+      /*sampler.shadow_ambient = ; */
+      /*sampler.lod_bias = ; */
+      sampler.min_lod = 0;
+      /*sampler.max_lod = ; */
+      /*sampler.border_color[0] = ; */
+      /*sampler.max_anisotropy = ; */
+      idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
+   }
+}
+
+static void
+cleanup_state(struct vl_idct *idct)
+{
+   unsigned i;
+
+   for (i = 0; i < 4; ++i)
+      idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
+}
+
+bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst)
+{
+   assert(idct && pipe && dst);
+
+   idct->pipe = pipe;
    pipe_resource_reference(&idct->destination, dst);
 
+   init_state(idct);
+
    if(!init_shaders(idct))
       return false;
 
@@ -470,6 +592,15 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_r
       return false;
    }
 
+   idct->surfaces.intermediate = idct->pipe->screen->get_tex_surface(
+      idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, 0,
+      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
+
+   idct->surfaces.destination = idct->pipe->screen->get_tex_surface(
+      idct->pipe->screen, idct->destination, 0, 0, 0,
+      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
+
+   init_constants(idct);
    xfer_buffers_map(idct);
 
    return true;
@@ -477,29 +608,83 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_r
 
 void vl_idct_cleanup(struct vl_idct *idct)
 {
+   idct->pipe->screen->tex_surface_destroy(idct->surfaces.destination);
+   idct->pipe->screen->tex_surface_destroy(idct->surfaces.intermediate);
+
    cleanup_shaders(idct);
    cleanup_buffers(idct);
+
+   cleanup_state(idct);
+
+   pipe_resource_reference(&idct->destination, NULL);
 }
 
 void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 {
+   struct vertex2f v;
+
    unsigned tex_pitch;
    short *texels;
+
    unsigned i;
 
    assert(idct);
    assert(block);
 
+   v.x = x;
+   v.y = y;
+
+   for (i = 0; i < 4; ++i) {
+      idct->vectors[idct->num_blocks * 4 + i] = v;
+   }
+
    tex_pitch = idct->tex_transfer->stride / util_format_get_blocksize(idct->tex_transfer->resource->format);
    texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
 
    for (i = 0; i < BLOCK_HEIGHT; ++i)
       memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2);
+
+   idct->num_blocks++;
 }
 
 void vl_idct_flush(struct vl_idct *idct)
 {
    xfer_buffers_unmap(idct);
-   // TODO
+
+   idct->pipe->set_constant_buffer(idct->pipe, PIPE_SHADER_VERTEX, 0, idct->vs_const_buf);
+
+#if 0
+   /* first stage */
+   idct->fb_state.cbufs[0] = idct->surfaces.intermediate;
+   idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
+   idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
+
+   idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
+   idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
+   idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
+   idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
+   idct->pipe->bind_vs_state(idct->pipe, idct->vs);
+   idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
+
+   util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
+
+   /* second stage */
+   idct->fb_state.cbufs[0] = idct->surfaces.destination;
+   idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
+   idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
+
+   idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
+   idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
+   idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
+   idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
+   idct->pipe->bind_vs_state(idct->pipe, idct->vs);
+   idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
+
+   util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
+
+   idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+#endif
+
+   idct->num_blocks = 0;
    xfer_buffers_map(idct);
 }
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 02a3250399f..6bb7f6bfbef 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -46,7 +46,7 @@ struct vl_idct
    {
       void *all[4];
       struct {
-         void *matrix, *transpose;
+         void *transpose, *matrix;
          void *source, *intermediate;
       } individual;
    } samplers;
@@ -55,7 +55,7 @@ struct vl_idct
    {
       struct pipe_sampler_view *all[4];
       struct {
-         struct pipe_sampler_view *matrix, *transpose;
+         struct pipe_sampler_view *transpose, *matrix;
          struct pipe_sampler_view *source, *intermediate;
       } individual;
    } sampler_views;
@@ -67,16 +67,28 @@ struct vl_idct
    {
       struct pipe_resource *all[4];
       struct {
-         struct pipe_resource *matrix, *transpose;
+         struct pipe_resource *transpose, *matrix;
          struct pipe_resource *source, *intermediate;
       } individual;
    } textures;
 
-   struct pipe_vertex_buffer quad;
-   struct pipe_vertex_buffer pos;
+   union
+   {
+      struct pipe_vertex_buffer all[2];
+      struct { struct pipe_vertex_buffer quad, pos; } individual;
+   } vertex_bufs;
+
+   unsigned num_blocks;
 
    struct pipe_transfer *tex_transfer;
    short *texels;
+
+   struct pipe_transfer *vec_transfer;
+   struct vertex2f *vectors;
+
+   struct {
+      struct pipe_surface *intermediate, *destination;
+   } surfaces;
 };
 
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 7bc7ba91f94..d08e09cab3f 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -116,7 +116,7 @@ enum MACROBLOCK_TYPE
    NUM_MACROBLOCK_TYPES
 };
 
-/* vertices for four quads covering the blocks */
+/* vertices for a quad covering a macroblock */
 static const struct vertex2f const_quad[4] = {
    {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
 };
@@ -1174,6 +1174,10 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    gen_macroblock_stream(r, num_macroblocks);
 
+   r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, r->vs_const_buf);
+   r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
+   r->pipe->set_viewport_state(r->pipe, &r->viewport);
+
    if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
@@ -1304,13 +1308,7 @@ update_render_target(struct vl_mpeg12_mc_renderer *r)
 
    pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
 
-   r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0,
-                                r->vs_const_buf);
-
    r->fb_state.cbufs[0] = r->surface;
-
-   r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
-   r->pipe->set_viewport_state(r->pipe, &r->viewport);
 }
 
 static void
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 1be5b156d35..f166c2cfde9 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -348,6 +348,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
 		return V_0280A0_SWAP_STD_REV;
 
 	case PIPE_FORMAT_R16G16_UNORM:
+	case PIPE_FORMAT_R32_FLOAT:
 		return V_0280A0_SWAP_STD;
 
 		/* 64-bit buffers. */
-- 
cgit v1.2.3


From 749504a935f2468ea1f84a54e918233d77d90178 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 20 Nov 2010 21:06:42 +0100
Subject: r600g: add support for signed normalized frame buffers

---
 src/gallium/drivers/r600/r600_state.c         | 11 +++++++++++
 src/gallium/drivers/r600/r600_state_inlines.h |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 54cc79b1549..fa011612aeb 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -844,6 +844,17 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 	desc = util_format_description(rtex->resource.base.b.format);
 	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
 		ntype = V_0280A0_NUMBER_SRGB;
+        else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
+		switch(desc->channel[0].type) {
+		case UTIL_FORMAT_TYPE_UNSIGNED:
+			ntype = V_0280A0_NUMBER_UNORM;
+			break;
+
+		case UTIL_FORMAT_TYPE_SIGNED:
+			ntype = V_0280A0_NUMBER_SNORM;
+			break;
+		}
+	}
 
 	format = r600_translate_colorformat(rtex->resource.base.b.format);
 	swap = r600_translate_colorswap(rtex->resource.base.b.format);
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index f166c2cfde9..cca98e1d313 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -308,6 +308,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
 		return V_0280A0_SWAP_STD;
 
 	case PIPE_FORMAT_R16_UNORM:
+	case PIPE_FORMAT_R16_SNORM:
 		return V_0280A0_SWAP_STD;
 
 		/* 32-bit buffers. */
@@ -400,6 +401,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
 		return V_0280A0_COLOR_8_8;
 
 	case PIPE_FORMAT_R16_UNORM:
+	case PIPE_FORMAT_R16_SNORM:
 		return V_0280A0_COLOR_16;
 
 		/* 32-bit buffers. */
-- 
cgit v1.2.3


From 03c5a0ea5cd5b3e5931d6784749f87789a016b98 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 20 Nov 2010 21:08:12 +0100
Subject: [g3dvl] enable stage 1&2 buffers in idct code

---
 src/gallium/auxiliary/vl/vl_idct.c               | 128 ++++++++++++-----------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  35 +++----
 2 files changed, 83 insertions(+), 80 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 55e0751891a..9f81e0b9e2f 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -39,7 +39,7 @@
 #define BLOCK_WIDTH 8
 #define BLOCK_HEIGHT 8
 #define SCALE_FACTOR_16_TO_12 (32768.0f / 2048.0f)
-#define SCALE_FACTOR_9_TO_16 (256.0f / 32768.0f)
+#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
 
 struct vertex_shader_consts
 {
@@ -217,13 +217,15 @@ create_transpose_frag_shader(struct vl_idct *idct)
    struct ureg_program *shader;
    struct ureg_src tc[2], sampler[2];
    struct ureg_src start[2], step[2];
-   struct ureg_dst fragment;
+   struct ureg_dst tmp, fragment;
    float scale[2];
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return NULL;
 
+   tmp = ureg_DECL_temporary(shader);
+
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
 
@@ -244,6 +246,9 @@ create_transpose_frag_shader(struct vl_idct *idct)
    //matrix_mul(shader, fragment, tc, sampler, start, step, scale);
    //ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
 
+   ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc[1], sampler[1]);
+   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, SCALE_FACTOR_16_TO_9));
+
    ureg_END(shader);
 
    return ureg_create_shader_and_destroy(shader, idct->pipe);
@@ -262,8 +267,6 @@ create_matrix_frag_shader(struct vl_idct *idct)
    if (!shader)
       return NULL;
 
-   tmp = ureg_DECL_temporary(shader);
-
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
 
@@ -281,10 +284,9 @@ create_matrix_frag_shader(struct vl_idct *idct)
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-//   matrix_mul(shader, tmp, tc, sampler, start, step, scale);
-//   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_9_TO_16), TGSI_SWIZZLE_X));
+   //matrix_mul(shader, tmp, tc, sampler, start, step, scale);
+
    ureg_TEX(shader, fragment, TGSI_TEXTURE_2D, tc[0], sampler[0]);
-   //ureg_MUL(shader, , ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, 1.0f));
 
    ureg_END(shader);
 
@@ -304,11 +306,7 @@ xfer_buffers_map(struct vl_idct *idct)
 
    idct->tex_transfer = idct->pipe->get_transfer
    (
-#if 0
-      idct->pipe, idct->textures.individual.intermediate,
-#else
-      idct->pipe, idct->destination,
-#endif
+      idct->pipe, idct->textures.individual.source,
       u_subresource(0, 0),
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &rect
@@ -391,8 +389,7 @@ init_buffers(struct vl_idct *idct)
    template.usage = PIPE_USAGE_DYNAMIC;
    idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
-   //template.format = PIPE_FORMAT_R32_FLOAT;
-   //template.usage = PIPE_USAGE_STATIC;
+   template.usage = PIPE_USAGE_STATIC;
    idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
    for (i = 0; i < 4; ++i) {
@@ -575,7 +572,8 @@ cleanup_state(struct vl_idct *idct)
       idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
 }
 
-bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst)
+bool
+vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst)
 {
    assert(idct && pipe && dst);
 
@@ -606,7 +604,8 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_r
    return true;
 }
 
-void vl_idct_cleanup(struct vl_idct *idct)
+void
+vl_idct_cleanup(struct vl_idct *idct)
 {
    idct->pipe->screen->tex_surface_destroy(idct->surfaces.destination);
    idct->pipe->screen->tex_surface_destroy(idct->surfaces.intermediate);
@@ -619,7 +618,8 @@ void vl_idct_cleanup(struct vl_idct *idct)
    pipe_resource_reference(&idct->destination, NULL);
 }
 
-void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
+void
+vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 {
    struct vertex2f v;
 
@@ -629,61 +629,69 @@ void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *bloc
    unsigned i;
 
    assert(idct);
-   assert(block);
-
-   v.x = x;
-   v.y = y;
-
-   for (i = 0; i < 4; ++i) {
-      idct->vectors[idct->num_blocks * 4 + i] = v;
-   }
 
    tex_pitch = idct->tex_transfer->stride / util_format_get_blocksize(idct->tex_transfer->resource->format);
    texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
 
-   for (i = 0; i < BLOCK_HEIGHT; ++i)
-      memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2);
+   if(block) {
+      v.x = x;
+      v.y = y;
+
+      for (i = 0; i < 4; ++i) {
+         idct->vectors[idct->num_blocks * 4 + i] = v;
+      }
+
+      for (i = 0; i < BLOCK_HEIGHT; ++i)
+         memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2);
 
-   idct->num_blocks++;
+      idct->num_blocks++;
+   } else {
+      for (i = 0; i < BLOCK_HEIGHT; ++i)
+         memset(texels + i * tex_pitch, 0, BLOCK_WIDTH * 2);      
+   }
 }
 
-void vl_idct_flush(struct vl_idct *idct)
+void
+vl_idct_flush(struct vl_idct *idct)
 {
    xfer_buffers_unmap(idct);
 
    idct->pipe->set_constant_buffer(idct->pipe, PIPE_SHADER_VERTEX, 0, idct->vs_const_buf);
 
-#if 0
-   /* first stage */
-   idct->fb_state.cbufs[0] = idct->surfaces.intermediate;
-   idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
-   idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
-
-   idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
-   idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
-   idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
-   idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
-   idct->pipe->bind_vs_state(idct->pipe, idct->vs);
-   idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
-
-   util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
-
-   /* second stage */
-   idct->fb_state.cbufs[0] = idct->surfaces.destination;
-   idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
-   idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
-
-   idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
-   idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
-   idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
-   idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
-   idct->pipe->bind_vs_state(idct->pipe, idct->vs);
-   idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
-
-   util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
-
-   idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
-#endif
+   if(idct->num_blocks > 0) {
+
+      /* first stage */
+      idct->fb_state.cbufs[0] = idct->surfaces.intermediate;
+      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
+      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
+
+      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
+      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
+      idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
+      idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
+      idct->pipe->bind_vs_state(idct->pipe, idct->vs);
+      idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
+
+      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
+
+      idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+
+      /* second stage */
+      idct->fb_state.cbufs[0] = idct->surfaces.destination;
+      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
+      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
+
+      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
+      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
+      idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
+      idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
+      idct->pipe->bind_vs_state(idct->pipe, idct->vs);
+      idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
+
+      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
+
+      idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+   }
 
    idct->num_blocks = 0;
    xfer_buffers_map(idct);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index d08e09cab3f..fd501937179 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -42,7 +42,6 @@
 #define MACROBLOCK_HEIGHT 16
 #define BLOCK_WIDTH 8
 #define BLOCK_HEIGHT 8
-#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
 
 struct vertex_shader_consts
 {
@@ -372,9 +371,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
     */
    field = calc_field(shader);
    texel = fetch_ycbcr(r, shader, field);
-   ureg_MAD(shader, fragment, ureg_src(texel), 
-            ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), 
-            ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
+   ureg_ADD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
@@ -410,7 +407,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    field = calc_field(shader);
    texel = fetch_ycbcr(r, shader, field);
    ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc, sampler);
-   ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
+   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
@@ -459,7 +456,7 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[0], sampler);
    ureg_ENDIF(shader);
 
-   ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
+   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
@@ -504,7 +501,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]);
    ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
 
-   ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
+   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref[0]));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
@@ -561,7 +558,7 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
 
-   ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
+   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref[0]));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
@@ -1315,7 +1312,7 @@ static void
 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
             enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
 {
-   unsigned tb = 0, sb = 0;
+   unsigned tb = 0;
    unsigned x, y;
 
    assert(r);
@@ -1323,10 +1320,9 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
 
    for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
-         if ((cbp >> (5 - tb)) & 1) {
-            vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT);
-            ++sb;
-         }
+         bool eb = !(cbp  & (1 << (5 - tb)));
+         vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, eb ? NULL : blocks);
+         blocks += eb ? 0 : BLOCK_WIDTH * BLOCK_HEIGHT;
       }
    }
 
@@ -1334,13 +1330,12 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
    assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 
    for (tb = 0; tb < 2; ++tb) {
-      if ((cbp >> (1 - tb)) & 1) {
-         if(tb == 0)
-            vl_idct_add_block(&r->idct_cb, mbx, mby, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT);
-         else
-            vl_idct_add_block(&r->idct_cr, mbx, mby, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT);
-         ++sb;
-      }
+      bool eb = !(cbp & (1 << (1 - tb)));
+      if(tb == 0)
+         vl_idct_add_block(&r->idct_cb, mbx, mby, eb ? NULL : blocks);
+      else
+         vl_idct_add_block(&r->idct_cr, mbx, mby, eb ? NULL : blocks);
+      blocks += eb ? 0 : BLOCK_WIDTH * BLOCK_HEIGHT;
    }
 }
 
-- 
cgit v1.2.3


From 95febb69cc333dad75c0f2da19dd85f444281ad2 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 20 Nov 2010 22:24:42 +0100
Subject: [g3dvl] move empty block handling into idct code

---
 src/gallium/auxiliary/vl/vl_idct.c               |  91 ++++++++++++----
 src/gallium/auxiliary/vl/vl_idct.h               |   5 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 129 ++++-------------------
 3 files changed, 92 insertions(+), 133 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 9f81e0b9e2f..51988b30ddf 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -260,7 +260,7 @@ create_matrix_frag_shader(struct vl_idct *idct)
    struct ureg_program *shader;
    struct ureg_src tc[2], sampler[2];
    struct ureg_src start[2], step[2];
-   struct ureg_dst tmp, fragment;
+   struct ureg_dst fragment;
    float scale[2];
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
@@ -293,6 +293,25 @@ create_matrix_frag_shader(struct vl_idct *idct)
    return ureg_create_shader_and_destroy(shader, idct->pipe);
 }
 
+static void *
+create_empty_block_frag_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+   struct ureg_dst fragment;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
 static void
 xfer_buffers_map(struct vl_idct *idct)
 {
@@ -340,6 +359,7 @@ init_shaders(struct vl_idct *idct)
    assert(idct->vs = create_vert_shader(idct));
    assert(idct->transpose_fs = create_transpose_frag_shader(idct));
    assert(idct->matrix_fs = create_matrix_frag_shader(idct));
+   assert(idct->eb_fs = create_empty_block_frag_shader(idct));
 
    return true;
 }
@@ -352,6 +372,7 @@ cleanup_shaders(struct vl_idct *idct)
    idct->pipe->delete_vs_state(idct->pipe, idct->vs);
    idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
    idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
+   idct->pipe->delete_fs_state(idct->pipe, idct->eb_fs);
 }
 
 static bool
@@ -361,7 +382,7 @@ init_buffers(struct vl_idct *idct)
    struct pipe_sampler_view sampler_view;
    struct pipe_vertex_element vertex_elems[2];
 
-   const unsigned max_blocks =
+   idct->max_blocks =
       align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
       align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
       idct->destination->depth0;
@@ -398,23 +419,23 @@ init_buffers(struct vl_idct *idct)
    }
 
    idct->vertex_bufs.individual.quad.stride = sizeof(struct vertex2f);
-   idct->vertex_bufs.individual.quad.max_index = 4 * max_blocks - 1;
+   idct->vertex_bufs.individual.quad.max_index = 4 * idct->max_blocks - 1;
    idct->vertex_bufs.individual.quad.buffer_offset = 0;
    idct->vertex_bufs.individual.quad.buffer = pipe_buffer_create
    (
       idct->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vertex2f) * 4 * max_blocks
+      sizeof(struct vertex2f) * 4 * idct->max_blocks
    );
 
    idct->vertex_bufs.individual.pos.stride = sizeof(struct vertex2f);
-   idct->vertex_bufs.individual.pos.max_index = 4 * max_blocks - 1;
+   idct->vertex_bufs.individual.pos.max_index = 4 * idct->max_blocks - 1;
    idct->vertex_bufs.individual.pos.buffer_offset = 0;
    idct->vertex_bufs.individual.pos.buffer = pipe_buffer_create
    (
       idct->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vertex2f) * 4 * max_blocks
+      sizeof(struct vertex2f) * 4 * idct->max_blocks
    );
 
    /* Rect element */
@@ -477,8 +498,8 @@ init_constants(struct vl_idct *idct)
       &buf_transfer
    );
 
-   for ( i = 0; i <= idct->vertex_bufs.individual.quad.max_index; i += 4)
-     memcpy(v + i, &const_quad, sizeof(const_quad));
+   for ( i = 0; i < idct->max_blocks; ++i)
+     memcpy(v + i * 4, &const_quad, sizeof(const_quad));
 
    pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.quad.buffer, buf_transfer);
 
@@ -527,6 +548,7 @@ init_state(struct vl_idct *idct)
    unsigned i;
 
    idct->num_blocks = 0;
+   idct->num_empty_blocks = 0;
 
    idct->viewport.scale[0] = idct->destination->width0;
    idct->viewport.scale[1] = idct->destination->height0;
@@ -621,7 +643,7 @@ vl_idct_cleanup(struct vl_idct *idct)
 void
 vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 {
-   struct vertex2f v;
+   struct vertex2f v, *v_dst;
 
    unsigned tex_pitch;
    short *texels;
@@ -630,24 +652,31 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 
    assert(idct);
 
-   tex_pitch = idct->tex_transfer->stride / util_format_get_blocksize(idct->tex_transfer->resource->format);
-   texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
-
    if(block) {
-      v.x = x;
-      v.y = y;
-
-      for (i = 0; i < 4; ++i) {
-         idct->vectors[idct->num_blocks * 4 + i] = v;
-      }
+      tex_pitch = idct->tex_transfer->stride / util_format_get_blocksize(idct->tex_transfer->resource->format);
+      texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
 
       for (i = 0; i < BLOCK_HEIGHT; ++i)
          memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2);
 
+      /* non empty blocks fills the vector buffer from left to right */
+      v_dst = idct->vectors + idct->num_blocks * 4;
+
       idct->num_blocks++;
+
    } else {
-      for (i = 0; i < BLOCK_HEIGHT; ++i)
-         memset(texels + i * tex_pitch, 0, BLOCK_WIDTH * 2);      
+
+      /* while empty blocks fills the vector buffer from right to left */
+      v_dst = idct->vectors + (idct->max_blocks - idct->num_empty_blocks) * 4 - 4;
+
+      idct->num_empty_blocks++;
+   }
+
+   v.x = x;
+   v.y = y;
+
+   for (i = 0; i < 4; ++i) {
+      v_dst[i] = v;
    }
 }
 
@@ -689,10 +718,30 @@ vl_idct_flush(struct vl_idct *idct)
       idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
 
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
+   }
 
-      idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+   if(idct->num_empty_blocks > 0) {
+
+      /* empty block handling */
+      idct->fb_state.cbufs[0] = idct->surfaces.destination;
+      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
+      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
+
+      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
+      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
+      idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
+      idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
+      idct->pipe->bind_vs_state(idct->pipe, idct->vs);
+      idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs);
+
+      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS,
+         (idct->max_blocks - idct->num_empty_blocks) * 4,
+         idct->num_empty_blocks * 4);
    }
 
+   idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+
    idct->num_blocks = 0;
+   idct->num_empty_blocks = 0;
    xfer_buffers_map(idct);
 }
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 6bb7f6bfbef..84ba5288aff 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -34,6 +34,8 @@ struct vl_idct
 {
    struct pipe_context *pipe;
 
+   unsigned max_blocks;
+
    struct pipe_viewport_state viewport;
    struct pipe_resource *vs_const_buf;
    struct pipe_framebuffer_state fb_state;
@@ -61,7 +63,7 @@ struct vl_idct
    } sampler_views;
 
    void *vs;
-   void *transpose_fs, *matrix_fs;
+   void *transpose_fs, *matrix_fs, *eb_fs;
 
    union
    {
@@ -79,6 +81,7 @@ struct vl_idct
    } vertex_bufs;
 
    unsigned num_blocks;
+   unsigned num_empty_blocks;
 
    struct pipe_transfer *tex_transfer;
    short *texels;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index fd501937179..15def609348 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -57,13 +57,6 @@ struct fragment_shader_consts
 struct vert_stream_0
 {
    struct vertex2f pos;
-
-   struct {
-      float luma_eb;
-      float cb_eb;
-      float cr_eb;
-   } field[2][2];
-
    float interlaced;
 };
 
@@ -71,10 +64,6 @@ enum VS_INPUT
 {
    VS_I_RECT,
    VS_I_VPOS,
-   VS_I_EB_0_0,
-   VS_I_EB_0_1,
-   VS_I_EB_1_0,
-   VS_I_EB_1_1,
    VS_I_INTERLACED,
    VS_I_MV0,
    VS_I_MV1,
@@ -91,10 +80,6 @@ enum VS_OUTPUT
    VS_O_TEX0,
    VS_O_TEX1,
    VS_O_TEX2,
-   VS_O_EB_0_0,
-   VS_O_EB_0_1,
-   VS_O_EB_1_0,
-   VS_O_EB_1_1,
    VS_O_INTERLACED,
    VS_O_MV0,
    VS_O_MV1,
@@ -125,9 +110,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 {
    struct ureg_program *shader;
    struct ureg_src norm, mbs;
-   struct ureg_src vrect, vpos, eb[2][2], interlaced, vmv[4];
+   struct ureg_src vrect, vpos, interlaced, vmv[4];
    struct ureg_dst scale, t_vpos, t_vtex;
-   struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2][2], o_interlaced, o_vmv[4];
+   struct ureg_dst o_vpos, o_line, o_vtex[3], o_interlaced, o_vmv[4];
    unsigned i, j, count, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -143,10 +128,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
-   eb[0][0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0);
-   eb[1][0] = ureg_DECL_vs_input(shader, VS_I_EB_1_0);
-   eb[0][1] = ureg_DECL_vs_input(shader, VS_I_EB_0_1);
-   eb[1][1] = ureg_DECL_vs_input(shader, VS_I_EB_1_1);
    interlaced = ureg_DECL_vs_input(shader, VS_I_INTERLACED);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
@@ -154,10 +135,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
    o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
    o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);
-   o_eb[0][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_0);
-   o_eb[0][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_1);
-   o_eb[1][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_0);
-   o_eb[1][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_1);
    o_interlaced = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INTERLACED);
    
    count=0;
@@ -230,11 +207,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_ENDIF(shader);
    ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
 
-   ureg_MOV(shader, o_eb[0][0], eb[0][0]);
-   ureg_MOV(shader, o_eb[0][1], eb[0][1]);
-   ureg_MOV(shader, o_eb[1][0], eb[1][0]);
-   ureg_MOV(shader, o_eb[1][1], eb[1][1]);
-
    ureg_MOV(shader, o_interlaced, interlaced);
 
    if(count > 0) {
@@ -266,11 +238,9 @@ calc_field(struct ureg_program *shader)
     *
     * tmp.z = fraction(line.y)
     * tmp.z = tmp.z >= 0.5 ? 1 : 0
-    * tmp.xy = line > 4 ? 1 : 0
     */
-   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(line, TGSI_SWIZZLE_Y));
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line, ureg_imm2f(shader, BLOCK_WIDTH / 2, BLOCK_HEIGHT / 2));
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
 
    return tmp;
 }
@@ -278,25 +248,19 @@ calc_field(struct ureg_program *shader)
 static struct ureg_dst
 fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src tc[3], eb[2][2], interlaced;
+   struct ureg_src tc[3], interlaced;
    struct ureg_src sampler[3];
-   struct ureg_dst texel, t_tc, t_field, tmp;
-   unsigned i, l_interlaced, l_y, l_x;
+   struct ureg_dst texel, t_tc, tmp;
+   unsigned i, label;
 
    texel = ureg_DECL_temporary(shader);
    t_tc = ureg_DECL_temporary(shader);
-   t_field = ureg_DECL_temporary(shader);
    tmp = ureg_DECL_temporary(shader);
 
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
    tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR);
 
-   eb[0][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_0, TGSI_INTERPOLATE_CONSTANT);
-   eb[0][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_1, TGSI_INTERPOLATE_CONSTANT);
-   eb[1][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_0, TGSI_INTERPOLATE_CONSTANT);
-   eb[1][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_1, TGSI_INTERPOLATE_CONSTANT);
-
    interlaced = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INTERLACED, TGSI_INTERPOLATE_CONSTANT);
 
    for (i = 0; i < 3; ++i)  {
@@ -308,36 +272,19 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
     * texel.cb = tex(tc[2], sampler[1])
     * texel.cr = tex(tc[2], sampler[2])
     */
-   ureg_MOV(shader, ureg_writemask(t_field, TGSI_WRITEMASK_XY), ureg_src(field));
-   ureg_IF(shader, interlaced, &l_interlaced);
-      ureg_MOV(shader, ureg_writemask(t_field, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z));
-   ureg_ENDIF(shader);
+   ureg_MUL(shader, tmp, interlaced, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y));
 
    for (i = 0; i < 3; ++i) {
       if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
-         ureg_IF(shader, ureg_scalar(ureg_src(t_field), TGSI_SWIZZLE_Y), &l_y);
-            ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[1]);
-
-            ureg_IF(shader, ureg_scalar(ureg_src(t_field), TGSI_SWIZZLE_X), &l_x);
-               ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(eb[1][1], TGSI_SWIZZLE_X + i));
-            ureg_ELSE(shader, &l_x);
-               ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(eb[1][0], TGSI_SWIZZLE_X + i));
-            ureg_ENDIF(shader);
 
-         ureg_ELSE(shader, &l_y);
+         ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), &label);
+            ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[1]);
+         ureg_ELSE(shader, &label);
             ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[0]);
-
-            ureg_IF(shader, ureg_scalar(ureg_src(t_field), TGSI_SWIZZLE_X), &l_x);
-               ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(eb[0][1], TGSI_SWIZZLE_X + i));
-            ureg_ELSE(shader, &l_x);
-               ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(eb[0][0], TGSI_SWIZZLE_X + i));
-            ureg_ENDIF(shader);
-
          ureg_ENDIF(shader);
 
       } else {
          ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[2]);
-         ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(eb[0][0], TGSI_SWIZZLE_X + i));
       }
 
       /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
@@ -345,7 +292,6 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
       ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
    }
 
-   ureg_release_temporary(shader, t_field);
    ureg_release_temporary(shader, t_tc);
    ureg_release_temporary(shader, tmp);
 
@@ -450,7 +396,7 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    field = calc_field(shader);
    texel = fetch_ycbcr(r, shader, field);
 
-   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z), &label);
+   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
       ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[1], sampler);
    ureg_ELSE(shader, &label);
       ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[0], sampler);
@@ -548,7 +494,7 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    field = calc_field(shader);
    texel = fetch_ycbcr(r, shader, field);
 
-   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z), &label);
+   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
       ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
       ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[3], sampler[1]);
    ureg_ELSE(shader, &label);
@@ -708,7 +654,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
 
    memset(&template, 0, sizeof(struct pipe_resource));
-   template.target = PIPE_TEXTURE_3D;
+   template.target = PIPE_TEXTURE_2D;
    /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
    template.format = PIPE_FORMAT_R16_SNORM;
    template.last_level = 0;
@@ -796,32 +742,8 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    vertex_elems[VS_I_VPOS].vertex_buffer_index = 1;
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /* y, cr, cb z-coordinate element top left block */
-   vertex_elems[VS_I_EB_0_0].src_offset = sizeof(struct vertex2f);
-   vertex_elems[VS_I_EB_0_0].instance_divisor = 0;
-   vertex_elems[VS_I_EB_0_0].vertex_buffer_index = 1;
-   vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
-
-   /* y, cr, cb z-coordinate element top right block */
-   vertex_elems[VS_I_EB_0_1].src_offset = sizeof(struct vertex2f) + sizeof(float) * 3;
-   vertex_elems[VS_I_EB_0_1].instance_divisor = 0;
-   vertex_elems[VS_I_EB_0_1].vertex_buffer_index = 1;
-   vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
-
-   /* y, cr, cb z-coordinate element bottom left block */
-   vertex_elems[VS_I_EB_1_0].src_offset = sizeof(struct vertex2f) + sizeof(float) * 6;
-   vertex_elems[VS_I_EB_1_0].instance_divisor = 0;
-   vertex_elems[VS_I_EB_1_0].vertex_buffer_index = 1;
-   vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
-
-   /* y, cr, cb z-coordinate element bottom right block */
-   vertex_elems[VS_I_EB_1_1].src_offset = sizeof(struct vertex2f) + sizeof(float) * 9;
-   vertex_elems[VS_I_EB_1_1].instance_divisor = 0;
-   vertex_elems[VS_I_EB_1_1].vertex_buffer_index = 1;
-   vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
-
    /* progressive=1.0f interlaced=0.0f */
-   vertex_elems[VS_I_INTERLACED].src_offset = sizeof(struct vertex2f) + sizeof(float) * 12;
+   vertex_elems[VS_I_INTERLACED].src_offset = sizeof(struct vertex2f);
    vertex_elems[VS_I_INTERLACED].instance_divisor = 0;
    vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 1;
    vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
@@ -850,9 +772,9 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    vertex_elems[VS_I_MV3].vertex_buffer_index = 3;
    vertex_elems[VS_I_MV3].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 7, vertex_elems);
-   r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 9, vertex_elems);
-   r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 11, vertex_elems);
+   r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 3, vertex_elems);
+   r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 5, vertex_elems);
+   r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 7, vertex_elems);
 
    r->vs_const_buf = pipe_buffer_create
    (
@@ -1048,21 +970,6 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
          v.pos.x = mb->mbx;
          v.pos.y = mb->mby;
 
-         v.field[0][0].luma_eb = mb->cbp & 32 ? 0.0f : -1.0f;
-         v.field[0][1].luma_eb = mb->cbp & 16 ? 0.0f : -1.0f;
-         v.field[1][0].luma_eb = mb->cbp & 8 ? 0.0f : -1.0f;
-         v.field[1][1].luma_eb = mb->cbp & 4 ? 0.0f : -1.0f;
-
-         v.field[0][0].cb_eb = mb->cbp & 2 ? 0.0f : -1.0f;
-         v.field[0][1].cb_eb = mb->cbp & 2 ? 0.0f : -1.0f;
-         v.field[1][0].cb_eb = mb->cbp & 2 ? 0.0f : -1.0f;
-         v.field[1][1].cb_eb = mb->cbp & 2 ? 0.0f : -1.0f;
-
-         v.field[0][0].cr_eb = mb->cbp & 1 ? 0.0f : -1.0f;
-         v.field[0][1].cr_eb = mb->cbp & 1 ? 0.0f : -1.0f;
-         v.field[1][0].cr_eb = mb->cbp & 1 ? 0.0f : -1.0f;
-         v.field[1][1].cr_eb = mb->cbp & 1 ? 0.0f : -1.0f;
-   
          v.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
 
          for ( i = 0; i < 4; ++i )
-- 
cgit v1.2.3


From cc998ddf929d5b6caea9f3d4b31c92aed7c55d96 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 21 Nov 2010 13:57:02 +0100
Subject: r600g: remove accidentally added r32 float format

---
 src/gallium/drivers/r600/r600_state_inlines.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index cca98e1d313..a78834ecf43 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -349,7 +349,6 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
 		return V_0280A0_SWAP_STD_REV;
 
 	case PIPE_FORMAT_R16G16_UNORM:
-	case PIPE_FORMAT_R32_FLOAT:
 		return V_0280A0_SWAP_STD;
 
 		/* 64-bit buffers. */
-- 
cgit v1.2.3


From 331eb58d1532303770e3cae2ba5ed4c39a159881 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 21 Nov 2010 13:58:23 +0100
Subject: r600g: disable staging upload for now

---
 src/gallium/drivers/r600/r600_texture.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index e719f7fb983..a5ac75736f0 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -546,8 +546,8 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
                        PIPE_TRANSFER_UNSYNCHRONIZED)))
                 use_staging_texture = TRUE;
 
-        if (!permit_hardware_blit(ctx->screen, texture) ||
-            (texture->flags & R600_RESOURCE_FLAG_TRANSFER))
+        /*if (!permit_hardware_blit(ctx->screen, texture) ||
+            (texture->flags & R600_RESOURCE_FLAG_TRANSFER))*/
                 use_staging_texture = FALSE;
 
 	trans = CALLOC_STRUCT(r600_transfer);
-- 
cgit v1.2.3


From ac1fd50163119a887487d748fab507b23e215c2b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 21 Nov 2010 14:19:40 +0100
Subject: [g3dvl] first working version of idct code

---
 src/gallium/auxiliary/vl/vl_idct.c | 88 ++++++++++++++++++++------------------
 1 file changed, 46 insertions(+), 42 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 51988b30ddf..bc00ad79378 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -38,7 +38,7 @@
 
 #define BLOCK_WIDTH 8
 #define BLOCK_HEIGHT 8
-#define SCALE_FACTOR_16_TO_12 (32768.0f / 2048.0f)
+
 #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
 
 struct vertex_shader_consts
@@ -63,7 +63,6 @@ enum VS_OUTPUT
    VS_O_STEP
 };
 
-
 static const float const_matrix[8][8] = {
    {  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.353553f,  0.3535530f },
    {  0.4903930f,  0.4157350f,  0.2777850f,  0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f },
@@ -75,17 +74,6 @@ static const float const_matrix[8][8] = {
    {  0.0975451f, -0.2777850f,  0.4157350f, -0.4903930f,  0.4903930f, -0.4157350f,  0.277786f, -0.0975458f }
 };
 
-static const float const_transpose[8][8] = {
-   {  0.3535530f,  0.4903930f,  0.4619400f,  0.4157350f,  0.3535530f,  0.2777850f,  0.191342f,  0.0975451f },
-   {  0.3535530f,  0.4157350f,  0.1913420f, -0.0975452f, -0.3535530f, -0.4903930f, -0.461940f, -0.2777850f },
-   {  0.3535530f,  0.2777850f, -0.1913420f, -0.4903930f, -0.3535530f,  0.0975452f,  0.461940f,  0.4157350f },
-   {  0.3535530f,  0.0975451f, -0.4619400f, -0.2777850f,  0.3535540f,  0.4157350f, -0.191342f, -0.4903930f },
-   {  0.3535530f, -0.0975452f, -0.4619400f,  0.2777850f,  0.3535530f, -0.4157350f, -0.191341f,  0.4903930f },
-   {  0.3535530f, -0.2777850f, -0.1913420f,  0.4903930f, -0.3535540f, -0.0975451f,  0.461940f, -0.4157350f },
-   {  0.3535530f, -0.4157350f,  0.1913420f,  0.0975450f, -0.3535530f,  0.4903930f, -0.461940f,  0.2777860f },
-   {  0.3535530f, -0.4903930f,  0.4619400f, -0.4157350f,  0.3535530f, -0.2777850f,  0.191342f, -0.0975458f }
-};
-
 /* vertices for a quad covering a block */
 static const struct vertex2f const_quad[4] = {
    {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
@@ -217,15 +205,13 @@ create_transpose_frag_shader(struct vl_idct *idct)
    struct ureg_program *shader;
    struct ureg_src tc[2], sampler[2];
    struct ureg_src start[2], step[2];
-   struct ureg_dst tmp, fragment;
+   struct ureg_dst fragment;
    float scale[2];
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return NULL;
 
-   tmp = ureg_DECL_temporary(shader);
-
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
 
@@ -239,15 +225,11 @@ create_transpose_frag_shader(struct vl_idct *idct)
    sampler[1] = ureg_DECL_sampler(shader, 2);
 
    scale[0] = 1.0f;
-   scale[1] = SCALE_FACTOR_16_TO_12;
+   scale[1] = SCALE_FACTOR_16_TO_9;
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   //matrix_mul(shader, fragment, tc, sampler, start, step, scale);
-   //ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
-
-   ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc[1], sampler[1]);
-   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, SCALE_FACTOR_16_TO_9));
+   matrix_mul(shader, fragment, tc, sampler, start, step, scale);
 
    ureg_END(shader);
 
@@ -284,9 +266,7 @@ create_matrix_frag_shader(struct vl_idct *idct)
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   //matrix_mul(shader, tmp, tc, sampler, start, step, scale);
-
-   ureg_TEX(shader, fragment, TGSI_TEXTURE_2D, tc[0], sampler[0]);
+   matrix_mul(shader, fragment, tc, sampler, start, step, scale);
 
    ureg_END(shader);
 
@@ -325,7 +305,11 @@ xfer_buffers_map(struct vl_idct *idct)
 
    idct->tex_transfer = idct->pipe->get_transfer
    (
+#if 1
       idct->pipe, idct->textures.individual.source,
+#else
+      idct->pipe, idct->destination,
+#endif
       u_subresource(0, 0),
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &rect
@@ -391,7 +375,7 @@ init_buffers(struct vl_idct *idct)
 
    memset(&template, 0, sizeof(struct pipe_resource));
    template.target = PIPE_TEXTURE_2D;
-   template.format = PIPE_FORMAT_R32_FLOAT;
+   template.format = PIPE_FORMAT_R16_SNORM;
    template.last_level = 0;
    template.width0 = 8;
    template.height0 = 8;
@@ -487,9 +471,19 @@ init_constants(struct vl_idct *idct)
    struct pipe_transfer *buf_transfer;
    struct vertex_shader_consts *vs_consts;
    struct vertex2f *v;
+   short *s;
 
-   unsigned i;
+   struct pipe_box rect =
+   {
+      0, 0, 0,
+      BLOCK_WIDTH,
+      BLOCK_HEIGHT,
+      1
+   };
 
+   unsigned i, j, pitch;
+
+   /* quad vectors */
    v = pipe_buffer_map
    (
       idct->pipe,
@@ -497,37 +491,47 @@ init_constants(struct vl_idct *idct)
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &buf_transfer
    );
-
    for ( i = 0; i < idct->max_blocks; ++i)
      memcpy(v + i * 4, &const_quad, sizeof(const_quad));
-
    pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.quad.buffer, buf_transfer);
 
-
-   v = pipe_buffer_map
+   /* transposed matrix */
+   buf_transfer = idct->pipe->get_transfer
    (
-      idct->pipe,
-      idct->textures.individual.matrix,
+      idct->pipe, idct->textures.individual.transpose,
+      u_subresource(0, 0),
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buf_transfer
+      &rect
    );
+   pitch = buf_transfer->stride / util_format_get_blocksize(buf_transfer->resource->format);
 
-   memcpy(v, &const_matrix, sizeof(const_matrix));
+   s = idct->pipe->transfer_map(idct->pipe, buf_transfer);
+   for(i = 0; i < BLOCK_HEIGHT; ++i)
+      for(j = 0; j < BLOCK_WIDTH; ++j)
+         s[i * pitch + j] = const_matrix[j][i] * (1 << 15); // transpose
 
-   pipe_buffer_unmap(idct->pipe, idct->textures.individual.matrix, buf_transfer);
+   idct->pipe->transfer_unmap(idct->pipe, buf_transfer);
+   idct->pipe->transfer_destroy(idct->pipe, buf_transfer);
 
-   v = pipe_buffer_map
+   /* matrix */
+   buf_transfer = idct->pipe->get_transfer
    (
-      idct->pipe,
-      idct->textures.individual.transpose,
+      idct->pipe, idct->textures.individual.matrix,
+      u_subresource(0, 0),
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buf_transfer
+      &rect
    );
+   pitch = buf_transfer->stride / util_format_get_blocksize(buf_transfer->resource->format);
 
-   memcpy(v, &const_transpose, sizeof(const_transpose));
+   s = idct->pipe->transfer_map(idct->pipe, buf_transfer);
+   for(i = 0; i < BLOCK_HEIGHT; ++i)
+      for(j = 0; j < BLOCK_WIDTH; ++j)
+         s[i * pitch + j] = const_matrix[i][j] * (1 << 15);
 
-   pipe_buffer_unmap(idct->pipe, idct->textures.individual.transpose, buf_transfer);
+   idct->pipe->transfer_unmap(idct->pipe, buf_transfer);
+   idct->pipe->transfer_destroy(idct->pipe, buf_transfer);
 
+   /* normalisation constants */
    vs_consts = pipe_buffer_map
    (
       idct->pipe, idct->vs_const_buf,
-- 
cgit v1.2.3


From 42c7291d2cb50c2bd94dd9346a8402a24303d66d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 21 Nov 2010 14:34:38 +0100
Subject: [g3dvl] inverse check for iDCT

---
 src/gallium/state_trackers/xorg/xvmc/context.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index 4869aac3a2e..06a1633288b 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -209,8 +209,8 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Unsupported chroma format.\n");
       return BadImplementation;
    }
-   if (mc_type != (XVMC_MOCOMP | XVMC_MPEG_2)) {
-      XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Non-MPEG2/Mocomp acceleration unsupported.\n");
+   if (mc_type != (XVMC_IDCT | XVMC_MOCOMP | XVMC_MPEG_2)) {
+      XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Non-MPEG2/Mocomp/iDCT acceleration unsupported.\n");
       return BadImplementation;
    }
    if (surface_flags & XVMC_INTRA_UNSIGNED) {
-- 
cgit v1.2.3


From 21efda86875096333dc0412c0edab1e188f551d8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 23 Nov 2010 00:19:02 +0100
Subject: [g3dvl] add some error handling

---
 src/gallium/auxiliary/vl/vl_idct.c               | 25 ++++++---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 65 ++++++++++++++----------
 2 files changed, 55 insertions(+), 35 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index bc00ad79378..edc100d4a29 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -338,21 +338,21 @@ xfer_buffers_unmap(struct vl_idct *idct)
 static bool
 init_shaders(struct vl_idct *idct)
 {
-   assert(idct);
-
    assert(idct->vs = create_vert_shader(idct));
    assert(idct->transpose_fs = create_transpose_frag_shader(idct));
    assert(idct->matrix_fs = create_matrix_frag_shader(idct));
    assert(idct->eb_fs = create_empty_block_frag_shader(idct));
 
-   return true;
+   return 
+      idct->vs != NULL &&
+      idct->transpose_fs != NULL &&
+      idct->matrix_fs != NULL &&
+      idct->eb_fs != NULL;
 }
 
 static void
 cleanup_shaders(struct vl_idct *idct)
 {
-   assert(idct);
-
    idct->pipe->delete_vs_state(idct->pipe, idct->vs);
    idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
    idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
@@ -365,14 +365,13 @@ init_buffers(struct vl_idct *idct)
    struct pipe_resource template;
    struct pipe_sampler_view sampler_view;
    struct pipe_vertex_element vertex_elems[2];
+   unsigned i;
 
    idct->max_blocks =
       align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
       align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
       idct->destination->depth0;
 
-   unsigned i;
-
    memset(&template, 0, sizeof(struct pipe_resource));
    template.target = PIPE_TEXTURE_2D;
    template.format = PIPE_FORMAT_R16_SNORM;
@@ -398,6 +397,9 @@ init_buffers(struct vl_idct *idct)
    idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
    for (i = 0; i < 4; ++i) {
+      if(idct->textures.all[i] == NULL)
+         return false; /* a texture failed to allocate */
+
       u_sampler_view_default_template(&sampler_view, idct->textures.all[i], idct->textures.all[i]->format);
       idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
    }
@@ -412,6 +414,9 @@ init_buffers(struct vl_idct *idct)
       sizeof(struct vertex2f) * 4 * idct->max_blocks
    );
 
+   if(idct->vertex_bufs.individual.quad.buffer == NULL)
+      return false;
+
    idct->vertex_bufs.individual.pos.stride = sizeof(struct vertex2f);
    idct->vertex_bufs.individual.pos.max_index = 4 * idct->max_blocks - 1;
    idct->vertex_bufs.individual.pos.buffer_offset = 0;
@@ -422,6 +427,9 @@ init_buffers(struct vl_idct *idct)
       sizeof(struct vertex2f) * 4 * idct->max_blocks
    );
 
+   if(idct->vertex_bufs.individual.pos.buffer == NULL)
+      return false;
+
    /* Rect element */
    vertex_elems[0].src_offset = 0;
    vertex_elems[0].instance_divisor = 0;
@@ -443,6 +451,9 @@ init_buffers(struct vl_idct *idct)
       sizeof(struct vertex_shader_consts)
    );
 
+   if(idct->vs_const_buf == NULL)
+      return false;
+
    return true;
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 15def609348..32728413700 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -786,7 +786,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    return true;
 }
 
-static bool
+static void
 init_const_buffers(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_transfer *buf_transfer;
@@ -805,8 +805,6 @@ init_const_buffers(struct vl_mpeg12_mc_renderer *r)
      memcpy(rect + i * 4, &const_quad, sizeof(const_quad));
 
    pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.rect.buffer, buf_transfer);
-   
-   return true;
 }
 
 static void
@@ -1308,40 +1306,51 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    if (!renderer->texview_map)
       return false;
 
-   if (!init_pipe_state(renderer)) {
-      util_delete_keymap(renderer->texview_map, renderer->pipe);
-      return false;
-   }
-   if (!init_shaders(renderer)) {
-      util_delete_keymap(renderer->texview_map, renderer->pipe);
-      cleanup_pipe_state(renderer);
-      return false;
-   }
-   if (!init_buffers(renderer)) {
-      util_delete_keymap(renderer->texview_map, renderer->pipe);
-      cleanup_shaders(renderer);
-      cleanup_pipe_state(renderer);
-      return false;
-   }
+   if (!init_pipe_state(renderer))
+      goto error_pipe_state;
 
-   if (!init_const_buffers(renderer)) {
-      util_delete_keymap(renderer->texview_map, renderer->pipe);
-      cleanup_pipe_state(renderer);
-      cleanup_shaders(renderer);
-      cleanup_buffers(renderer);
-      return false;
-   }
+   if (!init_shaders(renderer))
+      goto error_shaders;
+
+   if (!init_buffers(renderer))
+      goto error_buffers;
+
+   init_const_buffers(renderer);
 
    renderer->surface = NULL;
    renderer->past = NULL;
    renderer->future = NULL;
    renderer->num_macroblocks = 0;
 
-   vl_idct_init(&renderer->idct_y, pipe, renderer->textures.individual.y);
-   vl_idct_init(&renderer->idct_cr, pipe, renderer->textures.individual.cr);
-   vl_idct_init(&renderer->idct_cb, pipe, renderer->textures.individual.cb);
+   if(!vl_idct_init(&renderer->idct_y, pipe, renderer->textures.individual.y))
+      goto error_idct_y;
+
+   if(!vl_idct_init(&renderer->idct_cr, pipe, renderer->textures.individual.cr))
+      goto error_idct_cr;
+
+   if(!vl_idct_init(&renderer->idct_cb, pipe, renderer->textures.individual.cb))
+      goto error_idct_cb;
 
    return true;
+
+error_idct_cb:
+   vl_idct_cleanup(&renderer->idct_cr);
+
+error_idct_cr:
+   vl_idct_cleanup(&renderer->idct_y);
+
+error_idct_y:
+   cleanup_buffers(renderer);
+
+error_buffers:
+   cleanup_shaders(renderer);
+
+error_shaders:
+   cleanup_pipe_state(renderer);
+
+error_pipe_state:
+   util_delete_keymap(renderer->texview_map, renderer->pipe);
+   return false;
 }
 
 void
-- 
cgit v1.2.3


From 58d04f816c00c6829975d0e797cb76eca3943e7d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 23 Nov 2010 21:26:26 +0100
Subject: [g3dvl] switch to r32 float for idct matrix

---
 src/gallium/auxiliary/vl/vl_idct.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index edc100d4a29..d6f065750f0 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -374,7 +374,7 @@ init_buffers(struct vl_idct *idct)
 
    memset(&template, 0, sizeof(struct pipe_resource));
    template.target = PIPE_TEXTURE_2D;
-   template.format = PIPE_FORMAT_R16_SNORM;
+   template.format = PIPE_FORMAT_R32_FLOAT;
    template.last_level = 0;
    template.width0 = 8;
    template.height0 = 8;
@@ -482,7 +482,7 @@ init_constants(struct vl_idct *idct)
    struct pipe_transfer *buf_transfer;
    struct vertex_shader_consts *vs_consts;
    struct vertex2f *v;
-   short *s;
+   float *f;
 
    struct pipe_box rect =
    {
@@ -516,10 +516,10 @@ init_constants(struct vl_idct *idct)
    );
    pitch = buf_transfer->stride / util_format_get_blocksize(buf_transfer->resource->format);
 
-   s = idct->pipe->transfer_map(idct->pipe, buf_transfer);
+   f = idct->pipe->transfer_map(idct->pipe, buf_transfer);
    for(i = 0; i < BLOCK_HEIGHT; ++i)
       for(j = 0; j < BLOCK_WIDTH; ++j)
-         s[i * pitch + j] = const_matrix[j][i] * (1 << 15); // transpose
+         f[i * pitch + j] = const_matrix[j][i]; // transpose
 
    idct->pipe->transfer_unmap(idct->pipe, buf_transfer);
    idct->pipe->transfer_destroy(idct->pipe, buf_transfer);
@@ -534,10 +534,10 @@ init_constants(struct vl_idct *idct)
    );
    pitch = buf_transfer->stride / util_format_get_blocksize(buf_transfer->resource->format);
 
-   s = idct->pipe->transfer_map(idct->pipe, buf_transfer);
+   f = idct->pipe->transfer_map(idct->pipe, buf_transfer);
    for(i = 0; i < BLOCK_HEIGHT; ++i)
       for(j = 0; j < BLOCK_WIDTH; ++j)
-         s[i * pitch + j] = const_matrix[i][j] * (1 << 15);
+         f[i * pitch + j] = const_matrix[i][j];
 
    idct->pipe->transfer_unmap(idct->pipe, buf_transfer);
    idct->pipe->transfer_destroy(idct->pipe, buf_transfer);
-- 
cgit v1.2.3


From 5a8078486a013152d150a4524ebfab929eefe6c4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 23 Nov 2010 22:43:29 +0100
Subject: [g3dvl] remove invalid use of assert

---
 src/gallium/auxiliary/vl/vl_idct.c               |  8 +++---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 36 +++++++++++++++---------
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index d6f065750f0..1cc4c470ae5 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -338,10 +338,10 @@ xfer_buffers_unmap(struct vl_idct *idct)
 static bool
 init_shaders(struct vl_idct *idct)
 {
-   assert(idct->vs = create_vert_shader(idct));
-   assert(idct->transpose_fs = create_transpose_frag_shader(idct));
-   assert(idct->matrix_fs = create_matrix_frag_shader(idct));
-   assert(idct->eb_fs = create_empty_block_frag_shader(idct));
+   idct->vs = create_vert_shader(idct);
+   idct->transpose_fs = create_transpose_frag_shader(idct);
+   idct->matrix_fs = create_matrix_frag_shader(idct);
+   idct->eb_fs = create_empty_block_frag_shader(idct);
 
    return 
       idct->vs != NULL &&
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 32728413700..8099929b8b0 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -599,20 +599,30 @@ init_shaders(struct vl_mpeg12_mc_renderer *r)
 {
    assert(r);
 
-   assert(r->i_vs = create_vert_shader(r, 0, 0));
-   assert(r->i_fs = create_intra_frag_shader(r));
+   r->i_vs = create_vert_shader(r, 0, 0);
+   r->i_fs = create_intra_frag_shader(r);
    
-   assert(r->p_vs[0] = create_vert_shader(r, 1, 1));
-   assert(r->p_vs[1] = create_vert_shader(r, 1, 2));
-   assert(r->p_fs[0] = create_frame_pred_frag_shader(r));
-   assert(r->p_fs[1] = create_field_pred_frag_shader(r));
-
-   assert(r->b_vs[0] = create_vert_shader(r, 2, 1));
-   assert(r->b_vs[1] = create_vert_shader(r, 2, 2));
-   assert(r->b_fs[0] = create_frame_bi_pred_frag_shader(r));
-   assert(r->b_fs[1] = create_field_bi_pred_frag_shader(r));
-
-   return true;
+   r->p_vs[0] = create_vert_shader(r, 1, 1);
+   r->p_vs[1] = create_vert_shader(r, 1, 2);
+   r->p_fs[0] = create_frame_pred_frag_shader(r);
+   r->p_fs[1] = create_field_pred_frag_shader(r);
+
+   r->b_vs[0] = create_vert_shader(r, 2, 1);
+   r->b_vs[1] = create_vert_shader(r, 2, 2);
+   r->b_fs[0] = create_frame_bi_pred_frag_shader(r);
+   r->b_fs[1] = create_field_bi_pred_frag_shader(r);
+
+   return
+      r->i_vs != NULL &&
+      r->i_fs != NULL &&
+      r->p_vs[0] != NULL &&
+      r->p_vs[1] != NULL &&
+      r->p_fs[0] != NULL &&
+      r->p_fs[1] != NULL &&
+      r->b_vs[0] != NULL &&
+      r->b_vs[1] != NULL &&
+      r->b_fs[0] != NULL &&
+      r->b_fs[1] != NULL;
 }
 
 static void
-- 
cgit v1.2.3


From ed49905944243863913bc2598f734ca038c85b94 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 24 Nov 2010 19:40:47 +0100
Subject: [g3dvl] spread scaling between idct stages

---
 src/gallium/auxiliary/vl/vl_idct.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 1cc4c470ae5..87cef065e22 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -41,6 +41,9 @@
 
 #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
 
+#define STAGE1_SCALE 4.0f
+#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE)
+
 struct vertex_shader_consts
 {
    struct vertex4f norm;
@@ -144,7 +147,7 @@ static void
 matrix_mul(struct ureg_program *shader, struct ureg_dst dst,
            struct ureg_src tc[2], struct ureg_src sampler[2],
            struct ureg_src start[2], struct ureg_src step[2],
-           float scale[2])
+           float scale)
 {
    struct ureg_dst t_tc[2], m[2][2], tmp[2];
    unsigned i, j;
@@ -179,17 +182,12 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst dst,
          ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), ureg_src(t_tc[0]), step[0]);
          ureg_ADD(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_src(t_tc[1]), step[1]);
       }
-
-      if(scale[0] != 1.0f)
-         ureg_MUL(shader, m[i][0], ureg_src(m[i][0]), ureg_scalar(ureg_imm1f(shader, scale[0]), TGSI_SWIZZLE_X));
-
-      if(scale[1] != 1.0f)
-         ureg_MUL(shader, m[i][1], ureg_src(m[i][1]), ureg_scalar(ureg_imm1f(shader, scale[1]), TGSI_SWIZZLE_X));
    }
 
    ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(m[0][0]), ureg_src(m[0][1]));
    ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(m[1][0]), ureg_src(m[1][1]));
-   ureg_ADD(shader, ureg_writemask(dst, TGSI_WRITEMASK_X), ureg_src(tmp[0]), ureg_src(tmp[1]));
+   ureg_ADD(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(tmp[0]), ureg_src(tmp[1]));
+   ureg_MUL(shader, dst, ureg_src(tmp[0]), ureg_imm1f(shader, scale));
 
    for(i = 0; i < 2; ++i) {
       ureg_release_temporary(shader, t_tc[i]);
@@ -206,7 +204,6 @@ create_transpose_frag_shader(struct vl_idct *idct)
    struct ureg_src tc[2], sampler[2];
    struct ureg_src start[2], step[2];
    struct ureg_dst fragment;
-   float scale[2];
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -224,12 +221,9 @@ create_transpose_frag_shader(struct vl_idct *idct)
    sampler[0] = ureg_DECL_sampler(shader, 0);
    sampler[1] = ureg_DECL_sampler(shader, 2);
 
-   scale[0] = 1.0f;
-   scale[1] = SCALE_FACTOR_16_TO_9;
-
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   matrix_mul(shader, fragment, tc, sampler, start, step, scale);
+   matrix_mul(shader, fragment, tc, sampler, start, step, STAGE1_SCALE);
 
    ureg_END(shader);
 
@@ -243,7 +237,6 @@ create_matrix_frag_shader(struct vl_idct *idct)
    struct ureg_src tc[2], sampler[2];
    struct ureg_src start[2], step[2];
    struct ureg_dst fragment;
-   float scale[2];
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -261,12 +254,9 @@ create_matrix_frag_shader(struct vl_idct *idct)
    sampler[0] = ureg_DECL_sampler(shader, 3);
    sampler[1] = ureg_DECL_sampler(shader, 1);
 
-   scale[0] = 1.0f;
-   scale[1] = 1.0f;
-
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   matrix_mul(shader, fragment, tc, sampler, start, step, scale);
+   matrix_mul(shader, fragment, tc, sampler, start, step, STAGE2_SCALE);
 
    ureg_END(shader);
 
-- 
cgit v1.2.3


From 5391ef86063d382ab011e887bdd0350f394f2352 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 24 Nov 2010 19:46:51 +0100
Subject: [g3dvl] remove flushing between stages

---
 src/gallium/auxiliary/vl/vl_idct.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 87cef065e22..a9bee3bfac0 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -708,8 +708,6 @@ vl_idct_flush(struct vl_idct *idct)
 
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
 
-      idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
-
       /* second stage */
       idct->fb_state.cbufs[0] = idct->surfaces.destination;
       idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
@@ -744,8 +742,6 @@ vl_idct_flush(struct vl_idct *idct)
          idct->num_empty_blocks * 4);
    }
 
-   idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
-
    idct->num_blocks = 0;
    idct->num_empty_blocks = 0;
    xfer_buffers_map(idct);
-- 
cgit v1.2.3


From 2c9db2484b7c1cec7a3a629f70a5aa840e16268e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 24 Nov 2010 20:00:52 +0100
Subject: [g3dvl] no need for all samplers at all stages

---
 src/gallium/auxiliary/vl/vl_idct.c | 18 +++++++-----------
 src/gallium/auxiliary/vl/vl_idct.h | 15 +++++++++------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index a9bee3bfac0..6e49600a286 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -219,7 +219,7 @@ create_transpose_frag_shader(struct vl_idct *idct)
    step[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
 
    sampler[0] = ureg_DECL_sampler(shader, 0);
-   sampler[1] = ureg_DECL_sampler(shader, 2);
+   sampler[1] = ureg_DECL_sampler(shader, 1);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -251,8 +251,8 @@ create_matrix_frag_shader(struct vl_idct *idct)
    step[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
    step[1] = ureg_imm1f(shader, 1.0f / BLOCK_WIDTH);
 
-   sampler[0] = ureg_DECL_sampler(shader, 3);
-   sampler[1] = ureg_DECL_sampler(shader, 1);
+   sampler[0] = ureg_DECL_sampler(shader, 1);
+   sampler[1] = ureg_DECL_sampler(shader, 0);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -295,11 +295,7 @@ xfer_buffers_map(struct vl_idct *idct)
 
    idct->tex_transfer = idct->pipe->get_transfer
    (
-#if 1
       idct->pipe, idct->textures.individual.source,
-#else
-      idct->pipe, idct->destination,
-#endif
       u_subresource(0, 0),
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &rect
@@ -701,8 +697,8 @@ vl_idct_flush(struct vl_idct *idct)
 
       idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
       idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
-      idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
-      idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
+      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]);
+      idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
       idct->pipe->bind_vs_state(idct->pipe, idct->vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
 
@@ -715,8 +711,8 @@ vl_idct_flush(struct vl_idct *idct)
 
       idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
       idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
-      idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
-      idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
+      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]);
+      idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
       idct->pipe->bind_vs_state(idct->pipe, idct->vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
 
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 84ba5288aff..01df3f9103f 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -47,18 +47,20 @@ struct vl_idct
    union
    {
       void *all[4];
+      void *stage[2][2];
       struct {
-         void *transpose, *matrix;
-         void *source, *intermediate;
+         void *transpose, *source;
+         void *matrix, *intermediate;
       } individual;
    } samplers;
 
    union
    {
       struct pipe_sampler_view *all[4];
+      struct pipe_sampler_view *stage[2][2];
       struct {
-         struct pipe_sampler_view *transpose, *matrix;
-         struct pipe_sampler_view *source, *intermediate;
+         struct pipe_sampler_view *transpose, *source;
+         struct pipe_sampler_view *matrix, *intermediate;
       } individual;
    } sampler_views;
 
@@ -68,9 +70,10 @@ struct vl_idct
    union
    {
       struct pipe_resource *all[4];
+      struct pipe_resource *stage[2][2];
       struct {
-         struct pipe_resource *transpose, *matrix;
-         struct pipe_resource *source, *intermediate;
+         struct pipe_resource *transpose, *source;
+         struct pipe_resource *matrix, *intermediate;
       } individual;
    } textures;
 
-- 
cgit v1.2.3


From a51b0daa59c03cae8cc67baa48c11ff63155cbcb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 24 Nov 2010 21:36:54 +0100
Subject: r600g: disable R32 float also in r600_translate_colorformat

---
 src/gallium/drivers/r600/r600_state_inlines.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index a78834ecf43..7fa56fe5a2f 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -433,8 +433,8 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
 	case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
 		return V_0280A0_COLOR_24_8;
 
-	case PIPE_FORMAT_R32_FLOAT:
-		return V_0280A0_COLOR_32_FLOAT;
+	//case PIPE_FORMAT_R32_FLOAT:
+	//	return V_0280A0_COLOR_32_FLOAT;
 
 	case PIPE_FORMAT_R16G16_FLOAT:
 		return V_0280A0_COLOR_16_16_FLOAT;
-- 
cgit v1.2.3


From 431e72984b12e6ef0a1668814ec54b14255f98a1 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 24 Nov 2010 21:40:50 +0100
Subject: r600g: reenable texture uploads, but keep R16_SNORM disabled

---
 src/gallium/drivers/r600/r600_texture.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index b7600e90eb5..ec7bd0d75cd 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -531,6 +531,9 @@ static boolean permit_hardware_blit(struct pipe_screen *screen,
                                          PIPE_BIND_SAMPLER_VIEW, 0))
                 return FALSE;
 
+	if (res->format == PIPE_FORMAT_R16_SNORM)
+                return FALSE;
+
         return TRUE;
 }
 
@@ -571,8 +574,8 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
                        PIPE_TRANSFER_UNSYNCHRONIZED)))
                 use_staging_texture = TRUE;
 
-        /*if (!permit_hardware_blit(ctx->screen, texture) ||
-            (texture->flags & R600_RESOURCE_FLAG_TRANSFER))*/
+        if (!permit_hardware_blit(ctx->screen, texture) ||
+            (texture->flags & R600_RESOURCE_FLAG_TRANSFER))
                 use_staging_texture = FALSE;
 
 	trans = CALLOC_STRUCT(r600_transfer);
-- 
cgit v1.2.3


From de623b96abea4352259c8079ddc1a16fe5d4b22f Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 24 Nov 2010 23:44:30 +0100
Subject: r600g: disable not working formats

---
 src/gallium/drivers/r600/r600_state_inlines.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 7fa56fe5a2f..981f5481cdf 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -359,7 +359,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
 		//		return FMT_16_16_16_16_FLOAT;
 
 		/* 128-bit buffers. */
-	case PIPE_FORMAT_R32G32B32A32_FLOAT:
+	//case PIPE_FORMAT_R32G32B32A32_FLOAT:
 		//		return FMT_32_32_32_32_FLOAT;
 		return 0;
 	default:
@@ -465,10 +465,10 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
 		return V_0280A0_COLOR_32_32;
 
 		/* 128-bit buffers. */
-	case PIPE_FORMAT_R32G32B32_FLOAT:
-	  	return V_0280A0_COLOR_32_32_32_FLOAT;
-	case PIPE_FORMAT_R32G32B32A32_FLOAT:
-		return V_0280A0_COLOR_32_32_32_32_FLOAT;
+	//case PIPE_FORMAT_R32G32B32_FLOAT:
+	//  	return V_0280A0_COLOR_32_32_32_FLOAT;
+	//case PIPE_FORMAT_R32G32B32A32_FLOAT:
+	//	return V_0280A0_COLOR_32_32_32_32_FLOAT;
 
 		/* YUV buffers. */
 	case PIPE_FORMAT_UYVY:
-- 
cgit v1.2.3


From c9e10c666adc64f6c5dfb04422560508f115aa54 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 24 Nov 2010 23:54:21 +0100
Subject: [g3dvl] use four elements in matrix texture fetch

---
 src/gallium/auxiliary/vl/vl_idct.c | 50 +++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 6e49600a286..497e67e94d3 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -147,10 +147,10 @@ static void
 matrix_mul(struct ureg_program *shader, struct ureg_dst dst,
            struct ureg_src tc[2], struct ureg_src sampler[2],
            struct ureg_src start[2], struct ureg_src step[2],
-           float scale)
+           bool fetch4[2], float scale)
 {
    struct ureg_dst t_tc[2], m[2][2], tmp[2];
-   unsigned i, j;
+   unsigned side, i, j;
 
    for(i = 0; i < 2; ++i) {
       t_tc[i] = ureg_DECL_temporary(shader);
@@ -170,17 +170,19 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst dst,
    ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), tc[1]);
    ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), start[1]);
 
-   for(i = 0; i < 2; ++i) {
-      for(j = 0; j < 4; ++j) {
-         /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
-         ureg_TEX(shader, tmp[0], TGSI_TEXTURE_2D, ureg_src(t_tc[0]), sampler[0]);
-         ureg_MOV(shader, ureg_writemask(m[i][0], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp[0]), TGSI_SWIZZLE_X));
+   for(side = 0; side < 2; ++side) {
+      for(i = 0; i < 2; ++i) {
+         if(fetch4[side]) {
+            ureg_TEX(shader, m[i][side], TGSI_TEXTURE_2D, ureg_src(t_tc[side]), sampler[side]);
+            ureg_ADD(shader, ureg_writemask(t_tc[side], TGSI_WRITEMASK_X), ureg_src(t_tc[side]), step[side]);
 
-         ureg_TEX(shader, tmp[1], TGSI_TEXTURE_2D, ureg_src(t_tc[1]), sampler[1]);
-         ureg_MOV(shader, ureg_writemask(m[i][1], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp[1]), TGSI_SWIZZLE_X));
+         } else for(j = 0; j < 4; ++j) {
+            /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
+            ureg_TEX(shader, tmp[side], TGSI_TEXTURE_2D, ureg_src(t_tc[side]), sampler[side]);
+            ureg_MOV(shader, ureg_writemask(m[i][side], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp[side]), TGSI_SWIZZLE_X));
 
-         ureg_ADD(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), ureg_src(t_tc[0]), step[0]);
-         ureg_ADD(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_src(t_tc[1]), step[1]);
+            ureg_ADD(shader, ureg_writemask(t_tc[side], TGSI_WRITEMASK_X << side), ureg_src(t_tc[side]), step[side]);
+         }
       }
    }
 
@@ -204,6 +206,7 @@ create_transpose_frag_shader(struct vl_idct *idct)
    struct ureg_src tc[2], sampler[2];
    struct ureg_src start[2], step[2];
    struct ureg_dst fragment;
+   bool fetch4[2];
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -215,15 +218,18 @@ create_transpose_frag_shader(struct vl_idct *idct)
    start[0] = ureg_imm1f(shader, 0.0f);
    start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
 
-   step[0] = ureg_imm1f(shader, 1.0f / BLOCK_HEIGHT);
+   step[0] = ureg_imm1f(shader, 4.0f / BLOCK_HEIGHT);
    step[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
 
    sampler[0] = ureg_DECL_sampler(shader, 0);
    sampler[1] = ureg_DECL_sampler(shader, 1);
 
+   fetch4[0] = true;
+   fetch4[1] = false;
+
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   matrix_mul(shader, fragment, tc, sampler, start, step, STAGE1_SCALE);
+   matrix_mul(shader, fragment, tc, sampler, start, step, fetch4, STAGE1_SCALE);
 
    ureg_END(shader);
 
@@ -237,6 +243,7 @@ create_matrix_frag_shader(struct vl_idct *idct)
    struct ureg_src tc[2], sampler[2];
    struct ureg_src start[2], step[2];
    struct ureg_dst fragment;
+   bool fetch4[2];
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -254,9 +261,12 @@ create_matrix_frag_shader(struct vl_idct *idct)
    sampler[0] = ureg_DECL_sampler(shader, 1);
    sampler[1] = ureg_DECL_sampler(shader, 0);
 
+   fetch4[0] = false;
+   fetch4[1] = false;
+
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   matrix_mul(shader, fragment, tc, sampler, start, step, STAGE2_SCALE);
+   matrix_mul(shader, fragment, tc, sampler, start, step, fetch4, STAGE2_SCALE);
 
    ureg_END(shader);
 
@@ -360,18 +370,20 @@ init_buffers(struct vl_idct *idct)
 
    memset(&template, 0, sizeof(struct pipe_resource));
    template.target = PIPE_TEXTURE_2D;
-   template.format = PIPE_FORMAT_R32_FLOAT;
+   template.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
    template.last_level = 0;
-   template.width0 = 8;
+   template.width0 = 2;
    template.height0 = 8;
    template.depth0 = 1;
    template.usage = PIPE_USAGE_IMMUTABLE;
    template.bind = PIPE_BIND_SAMPLER_VIEW;
    template.flags = 0;
 
-   idct->textures.individual.matrix = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
    idct->textures.individual.transpose = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
+   template.width0 = 8;
+   idct->textures.individual.matrix = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
+
    template.format = idct->destination->format;
    template.width0 = idct->destination->width0;
    template.height0 = idct->destination->height0;
@@ -505,7 +517,7 @@ init_constants(struct vl_idct *idct)
    f = idct->pipe->transfer_map(idct->pipe, buf_transfer);
    for(i = 0; i < BLOCK_HEIGHT; ++i)
       for(j = 0; j < BLOCK_WIDTH; ++j)
-         f[i * pitch + j] = const_matrix[j][i]; // transpose
+         f[i * pitch * 4 + j] = const_matrix[j][i]; // transpose
 
    idct->pipe->transfer_unmap(idct->pipe, buf_transfer);
    idct->pipe->transfer_destroy(idct->pipe, buf_transfer);
@@ -523,7 +535,7 @@ init_constants(struct vl_idct *idct)
    f = idct->pipe->transfer_map(idct->pipe, buf_transfer);
    for(i = 0; i < BLOCK_HEIGHT; ++i)
       for(j = 0; j < BLOCK_WIDTH; ++j)
-         f[i * pitch + j] = const_matrix[i][j];
+         f[i * pitch * 4 + j * 4] = const_matrix[i][j];
 
    idct->pipe->transfer_unmap(idct->pipe, buf_transfer);
    idct->pipe->transfer_destroy(idct->pipe, buf_transfer);
-- 
cgit v1.2.3


From ed8b767a8e09cff4d98a44cdc07b08f1b322c4d3 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 25 Nov 2010 19:37:12 +0100
Subject: [g3dvl] also use four elemets on right side multiplikation

---
 src/gallium/auxiliary/vl/vl_idct.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 497e67e94d3..c1550cb365a 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -167,14 +167,19 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst dst,
    ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), start[0]);
    ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_Y), tc[0]);
 
-   ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), tc[1]);
-   ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), start[1]);
+   if(fetch4[1]) {
+      ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), ureg_scalar(start[1], TGSI_SWIZZLE_Y));
+      ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_scalar(tc[1], TGSI_SWIZZLE_X));
+   } else {
+      ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), tc[1]);
+      ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), start[1]);
+   }
 
    for(side = 0; side < 2; ++side) {
       for(i = 0; i < 2; ++i) {
          if(fetch4[side]) {
             ureg_TEX(shader, m[i][side], TGSI_TEXTURE_2D, ureg_src(t_tc[side]), sampler[side]);
-            ureg_ADD(shader, ureg_writemask(t_tc[side], TGSI_WRITEMASK_X), ureg_src(t_tc[side]), step[side]);
+            ureg_MOV(shader, ureg_writemask(t_tc[side], TGSI_WRITEMASK_X), step[side]);
 
          } else for(j = 0; j < 4; ++j) {
             /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
@@ -256,13 +261,13 @@ create_matrix_frag_shader(struct vl_idct *idct)
    start[1] = ureg_imm1f(shader, 0.0f);
 
    step[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
-   step[1] = ureg_imm1f(shader, 1.0f / BLOCK_WIDTH);
+   step[1] = ureg_imm1f(shader, 4.0f / BLOCK_WIDTH);
 
    sampler[0] = ureg_DECL_sampler(shader, 1);
    sampler[1] = ureg_DECL_sampler(shader, 0);
 
    fetch4[0] = false;
-   fetch4[1] = false;
+   fetch4[1] = true;
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -380,8 +385,6 @@ init_buffers(struct vl_idct *idct)
    template.flags = 0;
 
    idct->textures.individual.transpose = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
-
-   template.width0 = 8;
    idct->textures.individual.matrix = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
    template.format = idct->destination->format;
@@ -535,7 +538,7 @@ init_constants(struct vl_idct *idct)
    f = idct->pipe->transfer_map(idct->pipe, buf_transfer);
    for(i = 0; i < BLOCK_HEIGHT; ++i)
       for(j = 0; j < BLOCK_WIDTH; ++j)
-         f[i * pitch * 4 + j * 4] = const_matrix[i][j];
+         f[i * pitch * 4 + j] = const_matrix[j][i]; // transpose
 
    idct->pipe->transfer_unmap(idct->pipe, buf_transfer);
    idct->pipe->transfer_destroy(idct->pipe, buf_transfer);
-- 
cgit v1.2.3


From 9cff90534389c2aad9b58ff04b1a5d624e3d0bdb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 25 Nov 2010 21:23:48 +0100
Subject: [g3dvl] add dump option to xvmc

---
 src/gallium/state_trackers/xorg/xvmc/surface.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 0a236e26429..209dffd2c58 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -26,6 +26,7 @@
  **************************************************************************/
 
 #include <assert.h>
+#include <stdio.h>
 #include <X11/Xlibint.h>
 #include <vl_winsys.h>
 #include <pipe/p_video_context.h>
@@ -373,6 +374,8 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
                       short destx, short desty, unsigned short destw, unsigned short desth,
                       int flags)
 {
+   static int dump_window = -1;
+
    struct pipe_video_context *vpipe;
    XvMCSurfacePrivate *surface_priv;
    XvMCContextPrivate *context_priv;
@@ -451,6 +454,17 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
 
    pipe_surface_reference(&drawable_surface, NULL);
 
+   if(dump_window == -1) {
+      dump_window = debug_get_num_option("XVMC_DUMP", 0);
+   }
+
+   if(dump_window) {
+      static unsigned int framenum = 0;
+      char cmd[256];
+      sprintf(cmd, "xwd -id %d -out xvmc_frame_%08d.xwd", (int)drawable, ++framenum);
+      system(cmd);
+   }
+
    XVMC_MSG(XVMC_TRACE, "[XvMC] Pushed surface %p to front buffer.\n", surface);
 
    return Success;
-- 
cgit v1.2.3


From 3dd7bf7d39781f3ef4c0b53732945674c9924cdf Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 25 Nov 2010 22:10:21 +0100
Subject: [g3dvl] no need to keep the idct matrix multiple times

---
 src/gallium/auxiliary/vl/vl_idct.c               | 104 ++++++++++++-----------
 src/gallium/auxiliary/vl/vl_idct.h               |   4 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  13 ++-
 3 files changed, 67 insertions(+), 54 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index c1550cb365a..5ee4e674d96 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -384,9 +384,6 @@ init_buffers(struct vl_idct *idct)
    template.bind = PIPE_BIND_SAMPLER_VIEW;
    template.flags = 0;
 
-   idct->textures.individual.transpose = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
-   idct->textures.individual.matrix = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
-
    template.format = idct->destination->format;
    template.width0 = idct->destination->width0;
    template.height0 = idct->destination->height0;
@@ -483,17 +480,8 @@ init_constants(struct vl_idct *idct)
    struct pipe_transfer *buf_transfer;
    struct vertex_shader_consts *vs_consts;
    struct vertex2f *v;
-   float *f;
-
-   struct pipe_box rect =
-   {
-      0, 0, 0,
-      BLOCK_WIDTH,
-      BLOCK_HEIGHT,
-      1
-   };
 
-   unsigned i, j, pitch;
+   unsigned i;
 
    /* quad vectors */
    v = pipe_buffer_map
@@ -507,42 +495,6 @@ init_constants(struct vl_idct *idct)
      memcpy(v + i * 4, &const_quad, sizeof(const_quad));
    pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.quad.buffer, buf_transfer);
 
-   /* transposed matrix */
-   buf_transfer = idct->pipe->get_transfer
-   (
-      idct->pipe, idct->textures.individual.transpose,
-      u_subresource(0, 0),
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &rect
-   );
-   pitch = buf_transfer->stride / util_format_get_blocksize(buf_transfer->resource->format);
-
-   f = idct->pipe->transfer_map(idct->pipe, buf_transfer);
-   for(i = 0; i < BLOCK_HEIGHT; ++i)
-      for(j = 0; j < BLOCK_WIDTH; ++j)
-         f[i * pitch * 4 + j] = const_matrix[j][i]; // transpose
-
-   idct->pipe->transfer_unmap(idct->pipe, buf_transfer);
-   idct->pipe->transfer_destroy(idct->pipe, buf_transfer);
-
-   /* matrix */
-   buf_transfer = idct->pipe->get_transfer
-   (
-      idct->pipe, idct->textures.individual.matrix,
-      u_subresource(0, 0),
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &rect
-   );
-   pitch = buf_transfer->stride / util_format_get_blocksize(buf_transfer->resource->format);
-
-   f = idct->pipe->transfer_map(idct->pipe, buf_transfer);
-   for(i = 0; i < BLOCK_HEIGHT; ++i)
-      for(j = 0; j < BLOCK_WIDTH; ++j)
-         f[i * pitch * 4 + j] = const_matrix[j][i]; // transpose
-
-   idct->pipe->transfer_unmap(idct->pipe, buf_transfer);
-   idct->pipe->transfer_destroy(idct->pipe, buf_transfer);
-
    /* normalisation constants */
    vs_consts = pipe_buffer_map
    (
@@ -610,12 +562,64 @@ cleanup_state(struct vl_idct *idct)
       idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
 }
 
+struct pipe_resource *
+vl_idct_upload_matrix(struct pipe_context *pipe)
+{
+   struct pipe_resource template, *matrix;
+   struct pipe_transfer *buf_transfer;
+   unsigned i, j, pitch;
+   float *f;
+
+   struct pipe_box rect =
+   {
+      0, 0, 0,
+      BLOCK_WIDTH,
+      BLOCK_HEIGHT,
+      1
+   };
+
+   memset(&template, 0, sizeof(struct pipe_resource));
+   template.target = PIPE_TEXTURE_2D;
+   template.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   template.last_level = 0;
+   template.width0 = 2;
+   template.height0 = 8;
+   template.depth0 = 1;
+   template.usage = PIPE_USAGE_IMMUTABLE;
+   template.bind = PIPE_BIND_SAMPLER_VIEW;
+   template.flags = 0;
+
+   matrix = pipe->screen->resource_create(pipe->screen, &template);
+
+   /* matrix */
+   buf_transfer = pipe->get_transfer
+   (
+      pipe, matrix,
+      u_subresource(0, 0),
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &rect
+   );
+   pitch = buf_transfer->stride / util_format_get_blocksize(buf_transfer->resource->format);
+
+   f = pipe->transfer_map(pipe, buf_transfer);
+   for(i = 0; i < BLOCK_HEIGHT; ++i)
+      for(j = 0; j < BLOCK_WIDTH; ++j)
+         f[i * pitch * 4 + j] = const_matrix[j][i]; // transpose
+
+   pipe->transfer_unmap(pipe, buf_transfer);
+   pipe->transfer_destroy(pipe, buf_transfer);
+
+   return matrix;
+}
+
 bool
-vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst)
+vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst, struct pipe_resource *matrix)
 {
    assert(idct && pipe && dst);
 
    idct->pipe = pipe;
+   pipe_resource_reference(&idct->textures.individual.matrix, matrix);
+   pipe_resource_reference(&idct->textures.individual.transpose, matrix);
    pipe_resource_reference(&idct->destination, dst);
 
    init_state(idct);
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 01df3f9103f..ed64a308f3b 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -97,7 +97,9 @@ struct vl_idct
    } surfaces;
 };
 
-bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst);
+struct pipe_resource *vl_idct_upload_matrix(struct pipe_context *pipe);
+
+bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst, struct pipe_resource *matrix);
 
 void vl_idct_cleanup(struct vl_idct *idct);
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 8099929b8b0..b756f2db611 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -1295,8 +1295,11 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
                            bool pot_buffers)
 {
+   struct pipe_resource *idct_matrix;
+
    assert(renderer);
    assert(pipe);
+
    /* TODO: Implement other policies */
    assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
    /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
@@ -1332,13 +1335,16 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    renderer->future = NULL;
    renderer->num_macroblocks = 0;
 
-   if(!vl_idct_init(&renderer->idct_y, pipe, renderer->textures.individual.y))
+   if(!(idct_matrix = vl_idct_upload_matrix(pipe)))
+      goto error_idct_matrix;
+
+   if(!vl_idct_init(&renderer->idct_y, pipe, renderer->textures.individual.y, idct_matrix))
       goto error_idct_y;
 
-   if(!vl_idct_init(&renderer->idct_cr, pipe, renderer->textures.individual.cr))
+   if(!vl_idct_init(&renderer->idct_cr, pipe, renderer->textures.individual.cr, idct_matrix))
       goto error_idct_cr;
 
-   if(!vl_idct_init(&renderer->idct_cb, pipe, renderer->textures.individual.cb))
+   if(!vl_idct_init(&renderer->idct_cb, pipe, renderer->textures.individual.cb, idct_matrix))
       goto error_idct_cb;
 
    return true;
@@ -1350,6 +1356,7 @@ error_idct_cr:
    vl_idct_cleanup(&renderer->idct_y);
 
 error_idct_y:
+error_idct_matrix:
    cleanup_buffers(renderer);
 
 error_buffers:
-- 
cgit v1.2.3


From 7408a6ab89e0bc87209b50334604fae93277fdc6 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 26 Nov 2010 19:14:55 +0100
Subject: [g3dvl] use inline constants instead of vs_const for idct

---
 src/gallium/auxiliary/vl/vl_idct.c               | 69 ++++++------------------
 src/gallium/auxiliary/vl/vl_idct.h               |  1 -
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  3 +-
 3 files changed, 18 insertions(+), 55 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 5ee4e674d96..d91963ada02 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -62,8 +62,7 @@ enum VS_OUTPUT
    VS_O_VPOS,
    VS_O_BLOCK,
    VS_O_TEX,
-   VS_O_START,
-   VS_O_STEP
+   VS_O_START
 };
 
 static const float const_matrix[8][8] = {
@@ -86,19 +85,19 @@ static void *
 create_vert_shader(struct vl_idct *idct)
 {
    struct ureg_program *shader;
-   struct ureg_src norm, bs;
+   struct ureg_src scale;
    struct ureg_src vrect, vpos;
-   struct ureg_dst scale, t_vpos;
-   struct ureg_dst o_vpos, o_block, o_tex, o_start, o_step;
+   struct ureg_dst t_vpos;
+   struct ureg_dst o_vpos, o_block, o_tex, o_start;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
       return NULL;
 
-   norm = ureg_DECL_constant(shader, 0);
-   bs = ureg_imm2f(shader, BLOCK_WIDTH, BLOCK_HEIGHT);
+   scale = ureg_imm2f(shader,
+      (float)BLOCK_WIDTH / idct->destination->width0, 
+      (float)BLOCK_HEIGHT / idct->destination->height0);
 
-   scale = ureg_DECL_temporary(shader);
    t_vpos = ureg_DECL_temporary(shader);
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
@@ -108,11 +107,8 @@ create_vert_shader(struct vl_idct *idct)
    o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
    o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
    o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
-   o_step = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP);
 
    /*
-    * scale = norm * mbs;
-    *
     * t_vpos = vpos + vrect
     * o_vpos.xy = t_vpos * scale
     * o_vpos.zw = vpos
@@ -120,23 +116,18 @@ create_vert_shader(struct vl_idct *idct)
     * o_block = vrect
     * o_tex = t_pos
     * o_start = vpos * scale
-    * o_step = norm
     *
     */
-   ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, bs);
-
    ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
-   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), ureg_src(scale));
+   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
-   ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
-   ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, ureg_src(scale));
-   ureg_MOV(shader, ureg_writemask(o_step, TGSI_WRITEMASK_XY), norm);
+   ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
 
    ureg_release_temporary(shader, t_vpos);
-   ureg_release_temporary(shader, scale);
 
    ureg_END(shader);
 
@@ -164,15 +155,15 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst dst,
     * tmp[0..1] = dot4(m[0..1][0], m[0..1][1])
     * fragment = tmp[0] + tmp[1]
     */
-   ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), start[0]);
-   ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_Y), tc[0]);
+   ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), ureg_scalar(start[0], TGSI_SWIZZLE_X));
+   ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_Y), ureg_scalar(tc[0], TGSI_SWIZZLE_Y));
 
    if(fetch4[1]) {
       ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), ureg_scalar(start[1], TGSI_SWIZZLE_Y));
       ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_scalar(tc[1], TGSI_SWIZZLE_X));
    } else {
-      ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), tc[1]);
-      ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), start[1]);
+      ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), ureg_scalar(tc[1], TGSI_SWIZZLE_X));
+      ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_scalar(start[1], TGSI_SWIZZLE_Y));
    }
 
    for(side = 0; side < 2; ++side) {
@@ -224,7 +215,7 @@ create_transpose_frag_shader(struct vl_idct *idct)
    start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
 
    step[0] = ureg_imm1f(shader, 4.0f / BLOCK_HEIGHT);
-   step[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
+   step[1] = ureg_imm1f(shader, 1.0f / idct->destination->height0);
 
    sampler[0] = ureg_DECL_sampler(shader, 0);
    sampler[1] = ureg_DECL_sampler(shader, 1);
@@ -260,7 +251,7 @@ create_matrix_frag_shader(struct vl_idct *idct)
    start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
    start[1] = ureg_imm1f(shader, 0.0f);
 
-   step[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_STEP, TGSI_INTERPOLATE_CONSTANT);
+   step[0] = ureg_imm1f(shader, 1.0f / idct->destination->width0);
    step[1] = ureg_imm1f(shader, 4.0f / BLOCK_WIDTH);
 
    sampler[0] = ureg_DECL_sampler(shader, 1);
@@ -442,16 +433,6 @@ init_buffers(struct vl_idct *idct)
 
    idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
 
-   idct->vs_const_buf = pipe_buffer_create
-   (
-      idct->pipe->screen,
-      PIPE_BIND_CONSTANT_BUFFER,
-      sizeof(struct vertex_shader_consts)
-   );
-
-   if(idct->vs_const_buf == NULL)
-      return false;
-
    return true;
 }
 
@@ -462,8 +443,6 @@ cleanup_buffers(struct vl_idct *idct)
 
    assert(idct);
 
-   pipe_resource_reference(&idct->vs_const_buf, NULL);
-
    for (i = 0; i < 4; ++i) {
       pipe_sampler_view_reference(&idct->sampler_views.all[i], NULL);
       pipe_resource_reference(&idct->textures.all[i], NULL);
@@ -478,7 +457,6 @@ static void
 init_constants(struct vl_idct *idct)
 {
    struct pipe_transfer *buf_transfer;
-   struct vertex_shader_consts *vs_consts;
    struct vertex2f *v;
 
    unsigned i;
@@ -494,19 +472,6 @@ init_constants(struct vl_idct *idct)
    for ( i = 0; i < idct->max_blocks; ++i)
      memcpy(v + i * 4, &const_quad, sizeof(const_quad));
    pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.quad.buffer, buf_transfer);
-
-   /* normalisation constants */
-   vs_consts = pipe_buffer_map
-   (
-      idct->pipe, idct->vs_const_buf,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buf_transfer
-   );
-
-   vs_consts->norm.x = 1.0f / idct->destination->width0;
-   vs_consts->norm.y = 1.0f / idct->destination->height0;
-
-   pipe_buffer_unmap(idct->pipe, idct->vs_const_buf, buf_transfer);
 }
 
 static void
@@ -705,8 +670,6 @@ vl_idct_flush(struct vl_idct *idct)
 {
    xfer_buffers_unmap(idct);
 
-   idct->pipe->set_constant_buffer(idct->pipe, PIPE_SHADER_VERTEX, 0, idct->vs_const_buf);
-
    if(idct->num_blocks > 0) {
 
       /* first stage */
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index ed64a308f3b..da9bfef6fe5 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -37,7 +37,6 @@ struct vl_idct
    unsigned max_blocks;
 
    struct pipe_viewport_state viewport;
-   struct pipe_resource *vs_const_buf;
    struct pipe_framebuffer_state fb_state;
 
    struct pipe_resource *destination;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index b756f2db611..7bb50596de6 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -1086,7 +1086,6 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    gen_macroblock_stream(r, num_macroblocks);
 
-   r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, r->vs_const_buf);
    r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
    r->pipe->set_viewport_state(r->pipe, &r->viewport);
 
@@ -1221,6 +1220,8 @@ update_render_target(struct vl_mpeg12_mc_renderer *r)
    pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
 
    r->fb_state.cbufs[0] = r->surface;
+
+   r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, r->vs_const_buf);
 }
 
 static void
-- 
cgit v1.2.3


From cfe489b89723117e56674c2be7761c201f8d78ff Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 26 Nov 2010 20:25:00 +0100
Subject: [g3dvl] split matrix mul into seperate functions

---
 src/gallium/auxiliary/vl/vl_idct.c | 178 ++++++++++++++++++++++++-------------
 1 file changed, 114 insertions(+), 64 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index d91963ada02..9efe4326926 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -135,74 +135,114 @@ create_vert_shader(struct vl_idct *idct)
 }
 
 static void
-matrix_mul(struct ureg_program *shader, struct ureg_dst dst,
-           struct ureg_src tc[2], struct ureg_src sampler[2],
-           struct ureg_src start[2], struct ureg_src step[2],
-           bool fetch4[2], float scale)
+fetch_one(struct ureg_program *shader, struct ureg_dst m[2],
+          struct ureg_src tc, struct ureg_src sampler,
+          struct ureg_src start, bool right_side, float size)
 {
-   struct ureg_dst t_tc[2], m[2][2], tmp[2];
-   unsigned side, i, j;
+   struct ureg_dst t_tc, tmp;
+   unsigned i, j;
 
+   t_tc = ureg_DECL_temporary(shader);
+   tmp = ureg_DECL_temporary(shader);
+
+   m[0] = ureg_DECL_temporary(shader);
+   m[1] = ureg_DECL_temporary(shader);
+
+   /*
+    * t_tc.x = right_side ? start.x : tc.x
+    * t_tc.y = right_side ? tc.y : start.y
+    * m[0..1].xyzw = tex(t_tc++, sampler)
+    */
+   if(right_side) {
+      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(tc, TGSI_SWIZZLE_X));
+      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(start, TGSI_SWIZZLE_Y));
+   } else {
+      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_X));
+      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_Y));
+   }
    for(i = 0; i < 2; ++i) {
-      t_tc[i] = ureg_DECL_temporary(shader);
-      for(j = 0; j < 2; ++j)
-         m[i][j] = ureg_DECL_temporary(shader);
-      tmp[i] = ureg_DECL_temporary(shader);
+      for(j = 0; j < 4; ++j) {
+         /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
+         ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
+         ureg_MOV(shader, ureg_writemask(m[i], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
+
+         if(i != 1 || j != 3) /* skip the last add */
+            ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X << right_side),
+               ureg_src(t_tc), ureg_imm1f(shader, 1.0f / size));
+      }
    }
 
+   ureg_release_temporary(shader, t_tc);
+   ureg_release_temporary(shader, tmp);
+}
+
+static void
+fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
+           struct ureg_src tc, struct ureg_src sampler,
+           struct ureg_src start, bool right_side, float size)
+{
+   struct ureg_dst t_tc;
+
+   t_tc = ureg_DECL_temporary(shader);
+   m[0] = ureg_DECL_temporary(shader);
+   m[1] = ureg_DECL_temporary(shader);
+
    /*
-    * m[0..1][0] = ?
-    * tmp[0..1] = dot4(m[0..1][0], m[0..1][1])
-    * fragment = tmp[0] + tmp[1]
+    * t_tc.x = right_side ? start.x : tc.x
+    * t_tc.y = right_side ? tc.y : start.y
+    * m[0..1] = tex(t_tc++, sampler)
     */
-   ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_X), ureg_scalar(start[0], TGSI_SWIZZLE_X));
-   ureg_MOV(shader, ureg_writemask(t_tc[0], TGSI_WRITEMASK_Y), ureg_scalar(tc[0], TGSI_SWIZZLE_Y));
-
-   if(fetch4[1]) {
-      ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), ureg_scalar(start[1], TGSI_SWIZZLE_Y));
-      ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_scalar(tc[1], TGSI_SWIZZLE_X));
+   if(right_side) {
+      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_Y));
+      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_X));
    } else {
-      ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_X), ureg_scalar(tc[1], TGSI_SWIZZLE_X));
-      ureg_MOV(shader, ureg_writemask(t_tc[1], TGSI_WRITEMASK_Y), ureg_scalar(start[1], TGSI_SWIZZLE_Y));
+      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_X));
+      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_Y));
    }
 
-   for(side = 0; side < 2; ++side) {
-      for(i = 0; i < 2; ++i) {
-         if(fetch4[side]) {
-            ureg_TEX(shader, m[i][side], TGSI_TEXTURE_2D, ureg_src(t_tc[side]), sampler[side]);
-            ureg_MOV(shader, ureg_writemask(t_tc[side], TGSI_WRITEMASK_X), step[side]);
+   ureg_TEX(shader, m[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
+   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 4.0f / size));
+   ureg_TEX(shader, m[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
 
-         } else for(j = 0; j < 4; ++j) {
-            /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
-            ureg_TEX(shader, tmp[side], TGSI_TEXTURE_2D, ureg_src(t_tc[side]), sampler[side]);
-            ureg_MOV(shader, ureg_writemask(m[i][side], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp[side]), TGSI_SWIZZLE_X));
+   ureg_release_temporary(shader, t_tc);
+}
 
-            ureg_ADD(shader, ureg_writemask(t_tc[side], TGSI_WRITEMASK_X << side), ureg_src(t_tc[side]), step[side]);
-         }
-      }
+static struct ureg_dst
+matrix_mul(struct ureg_program *shader, struct ureg_dst m[2][2])
+{
+   struct ureg_dst dst, tmp[2];
+   unsigned i;
+
+   dst = ureg_DECL_temporary(shader);
+   for(i = 0; i < 2; ++i) {
+      tmp[i] = ureg_DECL_temporary(shader);
    }
 
-   ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(m[0][0]), ureg_src(m[0][1]));
-   ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(m[1][0]), ureg_src(m[1][1]));
-   ureg_ADD(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(tmp[0]), ureg_src(tmp[1]));
-   ureg_MUL(shader, dst, ureg_src(tmp[0]), ureg_imm1f(shader, scale));
+   /*
+    * tmp[0..1] = dot4(m[0][0..1], m[1][0..1])
+    * dst = tmp[0] + tmp[1]
+    */
+   ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(m[0][0]), ureg_src(m[1][0]));
+   ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(m[0][1]), ureg_src(m[1][1]));
+   ureg_ADD(shader, ureg_writemask(dst, TGSI_WRITEMASK_X), ureg_src(tmp[0]), ureg_src(tmp[1]));
 
    for(i = 0; i < 2; ++i) {
-      ureg_release_temporary(shader, t_tc[i]);
-      for(j = 0; j < 2; ++j)
-         ureg_release_temporary(shader, m[i][j]);
       ureg_release_temporary(shader, tmp[i]);
    }
+
+   return dst;
 }
 
 static void *
 create_transpose_frag_shader(struct vl_idct *idct)
 {
    struct ureg_program *shader;
+
    struct ureg_src tc[2], sampler[2];
-   struct ureg_src start[2], step[2];
-   struct ureg_dst fragment;
-   bool fetch4[2];
+   struct ureg_src start[2];
+
+   struct ureg_dst m[2][2];
+   struct ureg_dst tmp, fragment;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -211,21 +251,25 @@ create_transpose_frag_shader(struct vl_idct *idct)
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
 
-   start[0] = ureg_imm1f(shader, 0.0f);
-   start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
-
-   step[0] = ureg_imm1f(shader, 4.0f / BLOCK_HEIGHT);
-   step[1] = ureg_imm1f(shader, 1.0f / idct->destination->height0);
-
    sampler[0] = ureg_DECL_sampler(shader, 0);
    sampler[1] = ureg_DECL_sampler(shader, 1);
 
-   fetch4[0] = true;
-   fetch4[1] = false;
+   start[0] = ureg_imm1f(shader, 0.0f);
+   start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
+
+   fetch_four(shader, m[0], tc[0], sampler[0], start[0], false, BLOCK_WIDTH);
+   fetch_one(shader, m[1], tc[1], sampler[1], start[1], true, idct->destination->height0);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   matrix_mul(shader, fragment, tc, sampler, start, step, fetch4, STAGE1_SCALE);
+   tmp = matrix_mul(shader, m);
+   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE1_SCALE));
+
+   ureg_release_temporary(shader, tmp);
+   ureg_release_temporary(shader, m[0][0]);
+   ureg_release_temporary(shader, m[0][1]);
+   ureg_release_temporary(shader, m[1][0]);
+   ureg_release_temporary(shader, m[1][1]);
 
    ureg_END(shader);
 
@@ -236,10 +280,12 @@ static void *
 create_matrix_frag_shader(struct vl_idct *idct)
 {
    struct ureg_program *shader;
+
    struct ureg_src tc[2], sampler[2];
-   struct ureg_src start[2], step[2];
-   struct ureg_dst fragment;
-   bool fetch4[2];
+   struct ureg_src start[2];
+
+   struct ureg_dst m[2][2];
+   struct ureg_dst tmp, fragment;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -248,21 +294,25 @@ create_matrix_frag_shader(struct vl_idct *idct)
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
 
-   start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
-   start[1] = ureg_imm1f(shader, 0.0f);
-
-   step[0] = ureg_imm1f(shader, 1.0f / idct->destination->width0);
-   step[1] = ureg_imm1f(shader, 4.0f / BLOCK_WIDTH);
-
    sampler[0] = ureg_DECL_sampler(shader, 1);
    sampler[1] = ureg_DECL_sampler(shader, 0);
 
-   fetch4[0] = false;
-   fetch4[1] = true;
+   start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
+   start[1] = ureg_imm1f(shader, 0.0f);
+
+   fetch_one(shader, m[0], tc[0], sampler[0], start[0], false, idct->destination->width0);
+   fetch_four(shader, m[1], tc[1], sampler[1], start[1], true, BLOCK_HEIGHT);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   matrix_mul(shader, fragment, tc, sampler, start, step, fetch4, STAGE2_SCALE);
+   tmp = matrix_mul(shader, m);
+   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
+
+   ureg_release_temporary(shader, tmp);
+   ureg_release_temporary(shader, m[0][0]);
+   ureg_release_temporary(shader, m[0][1]);
+   ureg_release_temporary(shader, m[1][0]);
+   ureg_release_temporary(shader, m[1][1]);
 
    ureg_END(shader);
 
-- 
cgit v1.2.3


From a981d62c9781cd204aaec643b0f6115496f01789 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 26 Nov 2010 21:44:22 +0100
Subject: r600g: disable hardware blit for stream texture

---
 src/gallium/drivers/r600/r600_texture.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index ec7bd0d75cd..65d6acb9e4c 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -575,7 +575,8 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
                 use_staging_texture = TRUE;
 
         if (!permit_hardware_blit(ctx->screen, texture) ||
-            (texture->flags & R600_RESOURCE_FLAG_TRANSFER))
+            (texture->flags & R600_RESOURCE_FLAG_TRANSFER) ||
+            (texture->usage == PIPE_USAGE_STREAM))
                 use_staging_texture = FALSE;
 
 	trans = CALLOC_STRUCT(r600_transfer);
-- 
cgit v1.2.3


From 13e28cff7655adec0f89aed9c5ee74f8481133ab Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 26 Nov 2010 21:50:14 +0100
Subject: [g3dvl] use four component fetch also for idct source

---
 src/gallium/auxiliary/vl/vl_idct.c | 19 ++++++++++---------
 src/gallium/auxiliary/vl/vl_idct.h | 12 ++++++------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 9efe4326926..99887277a3f 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -263,7 +263,7 @@ create_transpose_frag_shader(struct vl_idct *idct)
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    tmp = matrix_mul(shader, m);
-   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE1_SCALE));
+   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
 
    ureg_release_temporary(shader, tmp);
    ureg_release_temporary(shader, m[0][0]);
@@ -300,13 +300,13 @@ create_matrix_frag_shader(struct vl_idct *idct)
    start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
    start[1] = ureg_imm1f(shader, 0.0f);
 
-   fetch_one(shader, m[0], tc[0], sampler[0], start[0], false, idct->destination->width0);
+   fetch_four(shader, m[0], tc[0], sampler[0], start[0], false, idct->destination->width0);
    fetch_four(shader, m[1], tc[1], sampler[1], start[1], true, BLOCK_HEIGHT);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    tmp = matrix_mul(shader, m);
-   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
+   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE1_SCALE));
 
    ureg_release_temporary(shader, tmp);
    ureg_release_temporary(shader, m[0][0]);
@@ -425,13 +425,14 @@ init_buffers(struct vl_idct *idct)
    template.bind = PIPE_BIND_SAMPLER_VIEW;
    template.flags = 0;
 
-   template.format = idct->destination->format;
+   template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
    template.width0 = idct->destination->width0;
    template.height0 = idct->destination->height0;
    template.depth0 = idct->destination->depth0;
-   template.usage = PIPE_USAGE_DYNAMIC;
+   template.usage = PIPE_USAGE_STREAM;
    idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
+   template.format = idct->destination->format;
    template.usage = PIPE_USAGE_STATIC;
    idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
@@ -689,10 +690,10 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 
    if(block) {
       tex_pitch = idct->tex_transfer->stride / util_format_get_blocksize(idct->tex_transfer->resource->format);
-      texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
+      texels = idct->texels + (y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH) * 4;
 
       for (i = 0; i < BLOCK_HEIGHT; ++i)
-         memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2);
+         memcpy(texels + i * tex_pitch * 4, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2);
 
       /* non empty blocks fills the vector buffer from left to right */
       v_dst = idct->vectors + idct->num_blocks * 4;
@@ -732,7 +733,7 @@ vl_idct_flush(struct vl_idct *idct)
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
       idct->pipe->bind_vs_state(idct->pipe, idct->vs);
-      idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
+      idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
 
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
 
@@ -746,7 +747,7 @@ vl_idct_flush(struct vl_idct *idct)
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
       idct->pipe->bind_vs_state(idct->pipe, idct->vs);
-      idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
+      idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
 
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
    }
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index da9bfef6fe5..084ac36d3e1 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -48,8 +48,8 @@ struct vl_idct
       void *all[4];
       void *stage[2][2];
       struct {
-         void *transpose, *source;
-         void *matrix, *intermediate;
+         void *matrix, *source;
+         void *transpose, *intermediate;
       } individual;
    } samplers;
 
@@ -58,8 +58,8 @@ struct vl_idct
       struct pipe_sampler_view *all[4];
       struct pipe_sampler_view *stage[2][2];
       struct {
-         struct pipe_sampler_view *transpose, *source;
-         struct pipe_sampler_view *matrix, *intermediate;
+         struct pipe_sampler_view *matrix, *source;
+         struct pipe_sampler_view *transpose, *intermediate;
       } individual;
    } sampler_views;
 
@@ -71,8 +71,8 @@ struct vl_idct
       struct pipe_resource *all[4];
       struct pipe_resource *stage[2][2];
       struct {
-         struct pipe_resource *transpose, *source;
-         struct pipe_resource *matrix, *intermediate;
+         struct pipe_resource *matrix, *source;
+         struct pipe_resource *transpose, *intermediate;
       } individual;
    } textures;
 
-- 
cgit v1.2.3


From 027704db75a61300e733b0f5a9efbb491189dce5 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 27 Nov 2010 11:24:24 +0100
Subject: [g3dvl] give idct stage 1 & 2 its own sb_state and viewport

---
 src/gallium/auxiliary/vl/vl_idct.c | 48 +++++++++++++++++++++-----------------
 src/gallium/auxiliary/vl/vl_idct.h |  4 ++--
 2 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 99887277a3f..79cd8961573 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -432,7 +432,6 @@ init_buffers(struct vl_idct *idct)
    template.usage = PIPE_USAGE_STREAM;
    idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
-   template.format = idct->destination->format;
    template.usage = PIPE_USAGE_STATIC;
    idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
@@ -534,19 +533,25 @@ init_state(struct vl_idct *idct)
    idct->num_blocks = 0;
    idct->num_empty_blocks = 0;
 
-   idct->viewport.scale[0] = idct->destination->width0;
-   idct->viewport.scale[1] = idct->destination->height0;
-   idct->viewport.scale[2] = 1;
-   idct->viewport.scale[3] = 1;
-   idct->viewport.translate[0] = 0;
-   idct->viewport.translate[1] = 0;
-   idct->viewport.translate[2] = 0;
-   idct->viewport.translate[3] = 0;
+   idct->viewport[0].scale[0] = idct->destination->width0;
+   idct->viewport[1].scale[0] = idct->destination->width0;
 
-   idct->fb_state.width = idct->destination->width0;
-   idct->fb_state.height = idct->destination->height0;
-   idct->fb_state.nr_cbufs = 1;
-   idct->fb_state.zsbuf = NULL;
+   idct->fb_state[0].width = idct->destination->width0;
+   idct->fb_state[1].width = idct->destination->width0;
+
+   for(i = 0; i < 2; ++i) {
+      idct->viewport[i].scale[1] = idct->destination->height0;
+      idct->viewport[i].scale[2] = 1;
+      idct->viewport[i].scale[3] = 1;
+      idct->viewport[i].translate[0] = 0;
+      idct->viewport[i].translate[1] = 0;
+      idct->viewport[i].translate[2] = 0;
+      idct->viewport[i].translate[3] = 0;
+
+      idct->fb_state[i].height = idct->destination->height0;
+      idct->fb_state[i].nr_cbufs = 1;
+      idct->fb_state[i].zsbuf = NULL;
+   }
 
    for (i = 0; i < 4; ++i) {
       memset(&sampler, 0, sizeof(sampler));
@@ -651,10 +656,12 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
    idct->surfaces.intermediate = idct->pipe->screen->get_tex_surface(
       idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, 0,
       PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
+   idct->fb_state[0].cbufs[0] = idct->surfaces.intermediate;
 
    idct->surfaces.destination = idct->pipe->screen->get_tex_surface(
       idct->pipe->screen, idct->destination, 0, 0, 0,
       PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
+   idct->fb_state[1].cbufs[0] = idct->surfaces.destination;
 
    init_constants(idct);
    xfer_buffers_map(idct);
@@ -724,9 +731,8 @@ vl_idct_flush(struct vl_idct *idct)
    if(idct->num_blocks > 0) {
 
       /* first stage */
-      idct->fb_state.cbufs[0] = idct->surfaces.intermediate;
-      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
-      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
+      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]);
+      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[0]);
 
       idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
       idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
@@ -738,9 +744,8 @@ vl_idct_flush(struct vl_idct *idct)
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
 
       /* second stage */
-      idct->fb_state.cbufs[0] = idct->surfaces.destination;
-      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
-      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
+      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
+      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
 
       idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
       idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
@@ -755,9 +760,8 @@ vl_idct_flush(struct vl_idct *idct)
    if(idct->num_empty_blocks > 0) {
 
       /* empty block handling */
-      idct->fb_state.cbufs[0] = idct->surfaces.destination;
-      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
-      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
+      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
+      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
 
       idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
       idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 084ac36d3e1..037bfe27070 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -36,8 +36,8 @@ struct vl_idct
 
    unsigned max_blocks;
 
-   struct pipe_viewport_state viewport;
-   struct pipe_framebuffer_state fb_state;
+   struct pipe_viewport_state viewport[2];
+   struct pipe_framebuffer_state fb_state[2];
 
    struct pipe_resource *destination;
 
-- 
cgit v1.2.3


From 12836fbcfad7f317b1f5aa5e46f9946894bf040c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 27 Nov 2010 14:01:01 +0100
Subject: [g3dvl] idividual vs for each stage and a bunch of bugsfixes

---
 src/gallium/auxiliary/vl/vl_idct.c | 168 ++++++++++++++++++++++---------------
 src/gallium/auxiliary/vl/vl_idct.h |   4 +-
 2 files changed, 104 insertions(+), 68 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 79cd8961573..25dfac68d3c 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -82,7 +82,7 @@ static const struct vertex2f const_quad[4] = {
 };
 
 static void *
-create_vert_shader(struct vl_idct *idct)
+create_vert_shader(struct vl_idct *idct, bool calc_src_cords, struct pipe_resource *dst)
 {
    struct ureg_program *shader;
    struct ureg_src scale;
@@ -94,21 +94,16 @@ create_vert_shader(struct vl_idct *idct)
    if (!shader)
       return NULL;
 
-   scale = ureg_imm2f(shader,
-      (float)BLOCK_WIDTH / idct->destination->width0, 
-      (float)BLOCK_HEIGHT / idct->destination->height0);
-
    t_vpos = ureg_DECL_temporary(shader);
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
-   o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
-   o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
-   o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
 
    /*
+    * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
+    *
     * t_vpos = vpos + vrect
     * o_vpos.xy = t_vpos * scale
     * o_vpos.zw = vpos
@@ -118,14 +113,24 @@ create_vert_shader(struct vl_idct *idct)
     * o_start = vpos * scale
     *
     */
+   scale = ureg_imm2f(shader,
+      (float)BLOCK_WIDTH / dst->width0, 
+      (float)BLOCK_HEIGHT / dst->height0);
+
    ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
    ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
-   ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
-   ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-   ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
+   if(calc_src_cords) {
+      o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
+      o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
+      o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
+
+      ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
+      ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+      ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
+   }
 
    ureg_release_temporary(shader, t_vpos);
 
@@ -179,7 +184,7 @@ fetch_one(struct ureg_program *shader, struct ureg_dst m[2],
 static void
 fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
            struct ureg_src tc, struct ureg_src sampler,
-           struct ureg_src start, bool right_side, float size)
+           struct ureg_src start, bool right_side, float width)
 {
    struct ureg_dst t_tc;
 
@@ -201,19 +206,18 @@ fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
    }
 
    ureg_TEX(shader, m[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
-   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 4.0f / size));
+   ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / width));
    ureg_TEX(shader, m[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
 
    ureg_release_temporary(shader, t_tc);
 }
 
-static struct ureg_dst
-matrix_mul(struct ureg_program *shader, struct ureg_dst m[2][2])
+static void
+matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
 {
-   struct ureg_dst dst, tmp[2];
+   struct ureg_dst tmp[2];
    unsigned i;
 
-   dst = ureg_DECL_temporary(shader);
    for(i = 0; i < 2; ++i) {
       tmp[i] = ureg_DECL_temporary(shader);
    }
@@ -222,20 +226,21 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst m[2][2])
     * tmp[0..1] = dot4(m[0][0..1], m[1][0..1])
     * dst = tmp[0] + tmp[1]
     */
-   ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(m[0][0]), ureg_src(m[1][0]));
-   ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(m[0][1]), ureg_src(m[1][1]));
-   ureg_ADD(shader, ureg_writemask(dst, TGSI_WRITEMASK_X), ureg_src(tmp[0]), ureg_src(tmp[1]));
+   ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
+   ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(l[1]), ureg_src(r[1]));
+   ureg_ADD(shader, dst, ureg_src(tmp[0]), ureg_src(tmp[1]));
 
    for(i = 0; i < 2; ++i) {
       ureg_release_temporary(shader, tmp[i]);
    }
-
-   return dst;
 }
 
 static void *
 create_transpose_frag_shader(struct vl_idct *idct)
 {
+   struct pipe_resource *transpose = idct->textures.individual.transpose;
+   struct pipe_resource *intermediate = idct->textures.individual.intermediate;
+
    struct ureg_program *shader;
 
    struct ureg_src tc[2], sampler[2];
@@ -257,12 +262,13 @@ create_transpose_frag_shader(struct vl_idct *idct)
    start[0] = ureg_imm1f(shader, 0.0f);
    start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
 
-   fetch_four(shader, m[0], tc[0], sampler[0], start[0], false, BLOCK_WIDTH);
-   fetch_one(shader, m[1], tc[1], sampler[1], start[1], true, idct->destination->height0);
+   fetch_four(shader, m[0], tc[0], sampler[0], start[0], false, transpose->width0);
+   fetch_one(shader, m[1], tc[1], sampler[1], start[1], true, intermediate->height0);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   tmp = matrix_mul(shader, m);
+   tmp = ureg_DECL_temporary(shader);
+   matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), m[0], m[1]);
    ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
 
    ureg_release_temporary(shader, tmp);
@@ -279,18 +285,26 @@ create_transpose_frag_shader(struct vl_idct *idct)
 static void *
 create_matrix_frag_shader(struct vl_idct *idct)
 {
+   struct pipe_resource *matrix = idct->textures.individual.matrix;
+   struct pipe_resource *source = idct->textures.individual.source;
+
    struct ureg_program *shader;
 
    struct ureg_src tc[2], sampler[2];
    struct ureg_src start[2];
 
-   struct ureg_dst m[2][2];
-   struct ureg_dst tmp, fragment;
+   struct ureg_dst l[2], r[2];
+   struct ureg_dst t_tc, tmp, fragment;
+
+   //unsigned i;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return NULL;
 
+   t_tc = ureg_DECL_temporary(shader);
+   tmp = ureg_DECL_temporary(shader);
+
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
 
@@ -300,19 +314,29 @@ create_matrix_frag_shader(struct vl_idct *idct)
    start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
    start[1] = ureg_imm1f(shader, 0.0f);
 
-   fetch_four(shader, m[0], tc[0], sampler[0], start[0], false, idct->destination->width0);
-   fetch_four(shader, m[1], tc[1], sampler[1], start[1], true, BLOCK_HEIGHT);
-
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   tmp = matrix_mul(shader, m);
-   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE1_SCALE));
+   fetch_four(shader, r, tc[1], sampler[1], start[1], true, matrix->width0);
+
+   ureg_MOV(shader, t_tc, tc[0]);
+   //for (i = 0; i < 4; ++i) {
+
+      fetch_four(shader, l, ureg_src(t_tc), sampler[0], start[0], false, source->width0);
+      matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X /* << i */), l, r);
+      ureg_release_temporary(shader, l[0]);
+      ureg_release_temporary(shader, l[1]);
 
+   //   if (i != 3)
+   //      ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y),
+   //         ureg_src(t_tc), ureg_imm1f(shader, 1.0f / source->height0));
+   //}
+
+   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
+
+   ureg_release_temporary(shader, t_tc);
    ureg_release_temporary(shader, tmp);
-   ureg_release_temporary(shader, m[0][0]);
-   ureg_release_temporary(shader, m[0][1]);
-   ureg_release_temporary(shader, m[1][0]);
-   ureg_release_temporary(shader, m[1][1]);
+   ureg_release_temporary(shader, r[0]);
+   ureg_release_temporary(shader, r[1]);
 
    ureg_END(shader);
 
@@ -344,8 +368,8 @@ xfer_buffers_map(struct vl_idct *idct)
    struct pipe_box rect =
    {
       0, 0, 0,
-      idct->destination->width0,
-      idct->destination->height0,
+      idct->textures.individual.source->width0,
+      idct->textures.individual.source->height0,
       1
    };
 
@@ -380,24 +404,31 @@ xfer_buffers_unmap(struct vl_idct *idct)
 static bool
 init_shaders(struct vl_idct *idct)
 {
-   idct->vs = create_vert_shader(idct);
-   idct->transpose_fs = create_transpose_frag_shader(idct);
+   idct->matrix_vs = create_vert_shader(idct, true, idct->textures.individual.intermediate);
    idct->matrix_fs = create_matrix_frag_shader(idct);
+
+   idct->transpose_vs = create_vert_shader(idct, true, idct->destination);
+   idct->transpose_fs = create_transpose_frag_shader(idct);
+
+   idct->eb_vs = create_vert_shader(idct, false, idct->destination);
    idct->eb_fs = create_empty_block_frag_shader(idct);
 
    return 
-      idct->vs != NULL &&
-      idct->transpose_fs != NULL &&
-      idct->matrix_fs != NULL &&
-      idct->eb_fs != NULL;
+      idct->transpose_vs != NULL && idct->transpose_fs != NULL &&
+      idct->matrix_vs != NULL && idct->matrix_fs != NULL &&
+      idct->eb_vs != NULL && idct->eb_fs != NULL;
 }
 
 static void
 cleanup_shaders(struct vl_idct *idct)
 {
-   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
+   idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
    idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
+
+   idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
    idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
+
+   idct->pipe->delete_vs_state(idct->pipe, idct->eb_vs);
    idct->pipe->delete_fs_state(idct->pipe, idct->eb_fs);
 }
 
@@ -416,22 +447,20 @@ init_buffers(struct vl_idct *idct)
 
    memset(&template, 0, sizeof(struct pipe_resource));
    template.target = PIPE_TEXTURE_2D;
-   template.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
    template.last_level = 0;
-   template.width0 = 2;
-   template.height0 = 8;
    template.depth0 = 1;
-   template.usage = PIPE_USAGE_IMMUTABLE;
    template.bind = PIPE_BIND_SAMPLER_VIEW;
    template.flags = 0;
+   template.depth0 = 1;
 
    template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
-   template.width0 = idct->destination->width0;
+   template.width0 = idct->destination->width0 / 4;
    template.height0 = idct->destination->height0;
-   template.depth0 = idct->destination->depth0;
    template.usage = PIPE_USAGE_STREAM;
    idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
+   template.format = PIPE_FORMAT_R16_SNORM;
+   template.width0 = idct->destination->width0;
    template.usage = PIPE_USAGE_STATIC;
    idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
@@ -533,14 +562,19 @@ init_state(struct vl_idct *idct)
    idct->num_blocks = 0;
    idct->num_empty_blocks = 0;
 
-   idct->viewport[0].scale[0] = idct->destination->width0;
+   idct->viewport[0].scale[0] = idct->textures.individual.intermediate->width0;
+   idct->viewport[0].scale[1] = idct->textures.individual.intermediate->height0;
+
    idct->viewport[1].scale[0] = idct->destination->width0;
+   idct->viewport[1].scale[1] = idct->destination->height0;
+
+   idct->fb_state[0].width = idct->textures.individual.intermediate->width0;
+   idct->fb_state[0].height = idct->textures.individual.intermediate->height0;
 
-   idct->fb_state[0].width = idct->destination->width0;
    idct->fb_state[1].width = idct->destination->width0;
+   idct->fb_state[1].height = idct->destination->height0;
 
    for(i = 0; i < 2; ++i) {
-      idct->viewport[i].scale[1] = idct->destination->height0;
       idct->viewport[i].scale[2] = 1;
       idct->viewport[i].scale[3] = 1;
       idct->viewport[i].translate[0] = 0;
@@ -548,7 +582,6 @@ init_state(struct vl_idct *idct)
       idct->viewport[i].translate[2] = 0;
       idct->viewport[i].translate[3] = 0;
 
-      idct->fb_state[i].height = idct->destination->height0;
       idct->fb_state[i].nr_cbufs = 1;
       idct->fb_state[i].zsbuf = NULL;
    }
@@ -643,16 +676,16 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
    pipe_resource_reference(&idct->textures.individual.transpose, matrix);
    pipe_resource_reference(&idct->destination, dst);
 
-   init_state(idct);
-
-   if(!init_shaders(idct))
+   if(!init_buffers(idct))
       return false;
 
-   if(!init_buffers(idct)) {
-      cleanup_shaders(idct);
+   if(!init_shaders(idct)) {
+      cleanup_buffers(idct);
       return false;
    }
 
+   init_state(idct);
+
    idct->surfaces.intermediate = idct->pipe->screen->get_tex_surface(
       idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, 0,
       PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
@@ -689,6 +722,7 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
    struct vertex2f v, *v_dst;
 
    unsigned tex_pitch;
+   unsigned nr_components;
    short *texels;
 
    unsigned i;
@@ -696,11 +730,13 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
    assert(idct);
 
    if(block) {
-      tex_pitch = idct->tex_transfer->stride / util_format_get_blocksize(idct->tex_transfer->resource->format);
-      texels = idct->texels + (y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH) * 4;
+      nr_components = util_format_get_nr_components(idct->tex_transfer->resource->format);
+      
+      tex_pitch = idct->tex_transfer->stride / sizeof(short);
+      texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
 
       for (i = 0; i < BLOCK_HEIGHT; ++i)
-         memcpy(texels + i * tex_pitch * 4, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2);
+         memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
 
       /* non empty blocks fills the vector buffer from left to right */
       v_dst = idct->vectors + idct->num_blocks * 4;
@@ -738,7 +774,7 @@ vl_idct_flush(struct vl_idct *idct)
       idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
-      idct->pipe->bind_vs_state(idct->pipe, idct->vs);
+      idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
 
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
@@ -751,7 +787,7 @@ vl_idct_flush(struct vl_idct *idct)
       idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
-      idct->pipe->bind_vs_state(idct->pipe, idct->vs);
+      idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
 
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
@@ -767,7 +803,7 @@ vl_idct_flush(struct vl_idct *idct)
       idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
       idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
-      idct->pipe->bind_vs_state(idct->pipe, idct->vs);
+      idct->pipe->bind_vs_state(idct->pipe, idct->eb_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs);
 
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS,
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 037bfe27070..5c242bd0602 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -63,8 +63,8 @@ struct vl_idct
       } individual;
    } sampler_views;
 
-   void *vs;
-   void *transpose_fs, *matrix_fs, *eb_fs;
+   void *matrix_vs, *transpose_vs, *eb_vs;
+   void *matrix_fs, *transpose_fs, *eb_fs;
 
    union
    {
-- 
cgit v1.2.3


From 3fd53e6c2a05e65872de4292557d7839cbcf7395 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 27 Nov 2010 14:08:15 +0100
Subject: [g3dvl] some more bugfixing

---
 src/gallium/auxiliary/vl/vl_idct.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 25dfac68d3c..be3e0c59ea7 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -653,12 +653,12 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &rect
    );
-   pitch = buf_transfer->stride / util_format_get_blocksize(buf_transfer->resource->format);
+   pitch = buf_transfer->stride / sizeof(float);
 
    f = pipe->transfer_map(pipe, buf_transfer);
    for(i = 0; i < BLOCK_HEIGHT; ++i)
       for(j = 0; j < BLOCK_WIDTH; ++j)
-         f[i * pitch * 4 + j] = const_matrix[j][i]; // transpose
+         f[i * pitch + j] = const_matrix[j][i]; // transpose
 
    pipe->transfer_unmap(pipe, buf_transfer);
    pipe->transfer_destroy(pipe, buf_transfer);
@@ -801,8 +801,6 @@ vl_idct_flush(struct vl_idct *idct)
 
       idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
       idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
-      idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
-      idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
       idct->pipe->bind_vs_state(idct->pipe, idct->eb_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs);
 
-- 
cgit v1.2.3


From e742a1043dbd56fe11f0490cb74b7a738bab2238 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 27 Nov 2010 18:20:38 +0100
Subject: [g3dvl] use 8 zslices for idct

---
 src/gallium/auxiliary/vl/vl_idct.c | 121 ++++++++++++++++++-------------------
 src/gallium/auxiliary/vl/vl_idct.h |   4 --
 2 files changed, 60 insertions(+), 65 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index be3e0c59ea7..00aa829f6d2 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -82,7 +82,7 @@ static const struct vertex2f const_quad[4] = {
 };
 
 static void *
-create_vert_shader(struct vl_idct *idct, bool calc_src_cords, struct pipe_resource *dst)
+create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
 {
    struct ureg_program *shader;
    struct ureg_src scale;
@@ -114,8 +114,8 @@ create_vert_shader(struct vl_idct *idct, bool calc_src_cords, struct pipe_resour
     *
     */
    scale = ureg_imm2f(shader,
-      (float)BLOCK_WIDTH / dst->width0, 
-      (float)BLOCK_HEIGHT / dst->height0);
+      (float)BLOCK_WIDTH / idct->destination->width0, 
+      (float)BLOCK_HEIGHT / idct->destination->height0);
 
    ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
    ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
@@ -142,7 +142,7 @@ create_vert_shader(struct vl_idct *idct, bool calc_src_cords, struct pipe_resour
 static void
 fetch_one(struct ureg_program *shader, struct ureg_dst m[2],
           struct ureg_src tc, struct ureg_src sampler,
-          struct ureg_src start, bool right_side, float size)
+          struct ureg_src start, struct ureg_src block, float height)
 {
    struct ureg_dst t_tc, tmp;
    unsigned i, j;
@@ -158,22 +158,19 @@ fetch_one(struct ureg_program *shader, struct ureg_dst m[2],
     * t_tc.y = right_side ? tc.y : start.y
     * m[0..1].xyzw = tex(t_tc++, sampler)
     */
-   if(right_side) {
-      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(tc, TGSI_SWIZZLE_X));
-      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(start, TGSI_SWIZZLE_Y));
-   } else {
-      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_X));
-      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_Y));
-   }
+   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(tc, TGSI_SWIZZLE_X));
+   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(start, TGSI_SWIZZLE_Y));
+   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(block, TGSI_SWIZZLE_X));
+
    for(i = 0; i < 2; ++i) {
       for(j = 0; j < 4; ++j) {
          /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
-         ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
+         ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
          ureg_MOV(shader, ureg_writemask(m[i], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
 
          if(i != 1 || j != 3) /* skip the last add */
-            ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X << right_side),
-               ureg_src(t_tc), ureg_imm1f(shader, 1.0f / size));
+            ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y),
+               ureg_src(t_tc), ureg_imm1f(shader, 1.0f / height));
       }
    }
 
@@ -243,7 +240,7 @@ create_transpose_frag_shader(struct vl_idct *idct)
 
    struct ureg_program *shader;
 
-   struct ureg_src tc[2], sampler[2];
+   struct ureg_src block, tex, sampler[2];
    struct ureg_src start[2];
 
    struct ureg_dst m[2][2];
@@ -253,8 +250,8 @@ create_transpose_frag_shader(struct vl_idct *idct)
    if (!shader)
       return NULL;
 
-   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
-   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
+   block = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
+   tex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_CONSTANT);
 
    sampler[0] = ureg_DECL_sampler(shader, 0);
    sampler[1] = ureg_DECL_sampler(shader, 1);
@@ -262,8 +259,8 @@ create_transpose_frag_shader(struct vl_idct *idct)
    start[0] = ureg_imm1f(shader, 0.0f);
    start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
 
-   fetch_four(shader, m[0], tc[0], sampler[0], start[0], false, transpose->width0);
-   fetch_one(shader, m[1], tc[1], sampler[1], start[1], true, intermediate->height0);
+   fetch_four(shader, m[0], block, sampler[0], start[0], false, transpose->width0);
+   fetch_one(shader, m[1], tex, sampler[1], start[1], block, intermediate->height0);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -294,9 +291,9 @@ create_matrix_frag_shader(struct vl_idct *idct)
    struct ureg_src start[2];
 
    struct ureg_dst l[2], r[2];
-   struct ureg_dst t_tc, tmp, fragment;
+   struct ureg_dst t_tc, tmp, fragment[BLOCK_WIDTH];
 
-   //unsigned i;
+   unsigned i;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -314,29 +311,25 @@ create_matrix_frag_shader(struct vl_idct *idct)
    start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
    start[1] = ureg_imm1f(shader, 0.0f);
 
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
-   fetch_four(shader, r, tc[1], sampler[1], start[1], true, matrix->width0);
-
-   ureg_MOV(shader, t_tc, tc[0]);
-   //for (i = 0; i < 4; ++i) {
-
-      fetch_four(shader, l, ureg_src(t_tc), sampler[0], start[0], false, source->width0);
-      matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X /* << i */), l, r);
-      ureg_release_temporary(shader, l[0]);
-      ureg_release_temporary(shader, l[1]);
-
-   //   if (i != 3)
-   //      ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y),
-   //         ureg_src(t_tc), ureg_imm1f(shader, 1.0f / source->height0));
-   //}
-
-   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
+   for (i = 0; i < BLOCK_WIDTH; ++i)
+       fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
+
+   fetch_four(shader, l, tc[0], sampler[0], start[0], false, source->width0);
+   ureg_MUL(shader, l[0], ureg_src(l[0]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
+   ureg_MUL(shader, l[1], ureg_src(l[1]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
+   
+   for (i = 0; i < BLOCK_WIDTH; ++i) {
+      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i));
+      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], true, matrix->width0);
+      matrix_mul(shader, fragment[i], l, r);
+      ureg_release_temporary(shader, r[0]);
+      ureg_release_temporary(shader, r[1]);
+   }
 
    ureg_release_temporary(shader, t_tc);
    ureg_release_temporary(shader, tmp);
-   ureg_release_temporary(shader, r[0]);
-   ureg_release_temporary(shader, r[1]);
+   ureg_release_temporary(shader, l[0]);
+   ureg_release_temporary(shader, l[1]);
 
    ureg_END(shader);
 
@@ -404,13 +397,13 @@ xfer_buffers_unmap(struct vl_idct *idct)
 static bool
 init_shaders(struct vl_idct *idct)
 {
-   idct->matrix_vs = create_vert_shader(idct, true, idct->textures.individual.intermediate);
+   idct->matrix_vs = create_vert_shader(idct, true);
    idct->matrix_fs = create_matrix_frag_shader(idct);
 
-   idct->transpose_vs = create_vert_shader(idct, true, idct->destination);
+   idct->transpose_vs = create_vert_shader(idct, true);
    idct->transpose_fs = create_transpose_frag_shader(idct);
 
-   idct->eb_vs = create_vert_shader(idct, false, idct->destination);
+   idct->eb_vs = create_vert_shader(idct, false);
    idct->eb_fs = create_empty_block_frag_shader(idct);
 
    return 
@@ -446,21 +439,23 @@ init_buffers(struct vl_idct *idct)
       idct->destination->depth0;
 
    memset(&template, 0, sizeof(struct pipe_resource));
-   template.target = PIPE_TEXTURE_2D;
    template.last_level = 0;
    template.depth0 = 1;
    template.bind = PIPE_BIND_SAMPLER_VIEW;
    template.flags = 0;
-   template.depth0 = 1;
 
+   template.target = PIPE_TEXTURE_2D;
    template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
    template.width0 = idct->destination->width0 / 4;
    template.height0 = idct->destination->height0;
+   template.depth0 = 1;
    template.usage = PIPE_USAGE_STREAM;
    idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
+   template.target = PIPE_TEXTURE_3D;
    template.format = PIPE_FORMAT_R16_SNORM;
-   template.width0 = idct->destination->width0;
+   template.width0 = idct->destination->width0 / 8;
+   template.depth0 = 8;
    template.usage = PIPE_USAGE_STATIC;
    idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
@@ -571,9 +566,21 @@ init_state(struct vl_idct *idct)
    idct->fb_state[0].width = idct->textures.individual.intermediate->width0;
    idct->fb_state[0].height = idct->textures.individual.intermediate->height0;
 
+   idct->fb_state[0].nr_cbufs = 8;
+   for(i = 0; i < 8; ++i) {
+      idct->fb_state[0].cbufs[i] = idct->pipe->screen->get_tex_surface(
+         idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, i,
+         PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
+   }
+
    idct->fb_state[1].width = idct->destination->width0;
    idct->fb_state[1].height = idct->destination->height0;
 
+   idct->fb_state[1].nr_cbufs = 1;
+   idct->fb_state[1].cbufs[0] = idct->pipe->screen->get_tex_surface(
+      idct->pipe->screen, idct->destination, 0, 0, 0,
+      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
+
    for(i = 0; i < 2; ++i) {
       idct->viewport[i].scale[2] = 1;
       idct->viewport[i].scale[3] = 1;
@@ -582,7 +589,6 @@ init_state(struct vl_idct *idct)
       idct->viewport[i].translate[2] = 0;
       idct->viewport[i].translate[3] = 0;
 
-      idct->fb_state[i].nr_cbufs = 1;
       idct->fb_state[i].zsbuf = NULL;
    }
 
@@ -612,6 +618,12 @@ cleanup_state(struct vl_idct *idct)
 {
    unsigned i;
 
+   for(i = 0; i < 8; ++i) {
+      idct->pipe->screen->tex_surface_destroy(idct->fb_state[0].cbufs[i]);
+   }
+
+   idct->pipe->screen->tex_surface_destroy(idct->fb_state[1].cbufs[0]);
+
    for (i = 0; i < 4; ++i)
       idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
 }
@@ -686,16 +698,6 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
 
    init_state(idct);
 
-   idct->surfaces.intermediate = idct->pipe->screen->get_tex_surface(
-      idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, 0,
-      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
-   idct->fb_state[0].cbufs[0] = idct->surfaces.intermediate;
-
-   idct->surfaces.destination = idct->pipe->screen->get_tex_surface(
-      idct->pipe->screen, idct->destination, 0, 0, 0,
-      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
-   idct->fb_state[1].cbufs[0] = idct->surfaces.destination;
-
    init_constants(idct);
    xfer_buffers_map(idct);
 
@@ -705,9 +707,6 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
 void
 vl_idct_cleanup(struct vl_idct *idct)
 {
-   idct->pipe->screen->tex_surface_destroy(idct->surfaces.destination);
-   idct->pipe->screen->tex_surface_destroy(idct->surfaces.intermediate);
-
    cleanup_shaders(idct);
    cleanup_buffers(idct);
 
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 5c242bd0602..7234382196f 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -90,10 +90,6 @@ struct vl_idct
 
    struct pipe_transfer *vec_transfer;
    struct vertex2f *vectors;
-
-   struct {
-      struct pipe_surface *intermediate, *destination;
-   } surfaces;
 };
 
 struct pipe_resource *vl_idct_upload_matrix(struct pipe_context *pipe);
-- 
cgit v1.2.3


From 336c7735ae97ddc0a177562375136297c2de3d19 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 28 Nov 2010 01:21:41 +0100
Subject: [g3dvl] join empty blocks to get larger slices

---
 src/gallium/auxiliary/vl/vl_idct.c | 137 +++++++++++++++++++++++--------------
 src/gallium/auxiliary/vl/vl_idct.h |   6 ++
 2 files changed, 91 insertions(+), 52 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 00aa829f6d2..74adaf3dd4c 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -355,45 +355,6 @@ create_empty_block_frag_shader(struct vl_idct *idct)
    return ureg_create_shader_and_destroy(shader, idct->pipe);
 }
 
-static void
-xfer_buffers_map(struct vl_idct *idct)
-{
-   struct pipe_box rect =
-   {
-      0, 0, 0,
-      idct->textures.individual.source->width0,
-      idct->textures.individual.source->height0,
-      1
-   };
-
-   idct->tex_transfer = idct->pipe->get_transfer
-   (
-      idct->pipe, idct->textures.individual.source,
-      u_subresource(0, 0),
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &rect
-   );
-
-   idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
-
-   idct->vectors = pipe_buffer_map
-   (
-      idct->pipe,
-      idct->vertex_bufs.individual.pos.buffer,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &idct->vec_transfer
-   );
-}
-
-static void
-xfer_buffers_unmap(struct vl_idct *idct)
-{
-   pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, idct->vec_transfer);
-
-   idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
-   idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
-}
-
 static bool
 init_shaders(struct vl_idct *idct)
 {
@@ -678,6 +639,50 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    return matrix;
 }
 
+static void
+xfer_buffers_map(struct vl_idct *idct)
+{
+   struct pipe_box rect =
+   {
+      0, 0, 0,
+      idct->textures.individual.source->width0,
+      idct->textures.individual.source->height0,
+      1
+   };
+
+   idct->tex_transfer = idct->pipe->get_transfer
+   (
+      idct->pipe, idct->textures.individual.source,
+      u_subresource(0, 0),
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &rect
+   );
+
+   idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
+
+   idct->vectors = pipe_buffer_map
+   (
+      idct->pipe,
+      idct->vertex_bufs.individual.pos.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &idct->vec_transfer
+   );
+
+   idct->next_empty_block.l_x = ~1;
+   idct->next_empty_block.l_y = ~1;
+   idct->next_empty_block.r_x = ~1;
+   idct->next_empty_block.r_y = ~1;
+}
+
+static void
+xfer_buffers_unmap(struct vl_idct *idct)
+{
+   pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, idct->vec_transfer);
+
+   idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
+   idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
+}
+
 bool
 vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst, struct pipe_resource *matrix)
 {
@@ -715,13 +720,44 @@ vl_idct_cleanup(struct vl_idct *idct)
    pipe_resource_reference(&idct->destination, NULL);
 }
 
+static void
+flush_empty_block(struct vl_idct *idct, unsigned new_x, unsigned new_y)
+{
+   if (idct->next_empty_block.l_x == ~1 ||
+       idct->next_empty_block.l_y == ~1) {
+   
+      idct->next_empty_block.l_x = new_x;
+      idct->next_empty_block.l_y = new_y;
+
+   } else if (idct->next_empty_block.r_x != (new_x - 1) ||
+              idct->next_empty_block.r_y != new_y) {
+
+      struct vertex2f l, r, *v_dst;
+
+      v_dst = idct->vectors + (idct->max_blocks - idct->num_empty_blocks) * 4 - 4;
+
+      l.x = idct->next_empty_block.l_x;
+      l.y = idct->next_empty_block.l_y;
+      r.x = idct->next_empty_block.r_x;
+      r.y = idct->next_empty_block.r_y;
+      v_dst[0] = v_dst[3] = l;
+      v_dst[1] = v_dst[2] = r;
+
+      idct->next_empty_block.l_x = new_x;
+      idct->next_empty_block.l_y = new_y;
+      idct->num_empty_blocks++;
+   }
+
+   idct->next_empty_block.r_x = new_x;
+   idct->next_empty_block.r_y = new_y;
+}
+
 void
 vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 {
    struct vertex2f v, *v_dst;
 
    unsigned tex_pitch;
-   unsigned nr_components;
    short *texels;
 
    unsigned i;
@@ -729,8 +765,6 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
    assert(idct);
 
    if(block) {
-      nr_components = util_format_get_nr_components(idct->tex_transfer->resource->format);
-      
       tex_pitch = idct->tex_transfer->stride / sizeof(short);
       texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
 
@@ -742,25 +776,24 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 
       idct->num_blocks++;
 
-   } else {
-
-      /* while empty blocks fills the vector buffer from right to left */
-      v_dst = idct->vectors + (idct->max_blocks - idct->num_empty_blocks) * 4 - 4;
+      v.x = x;
+      v.y = y;
 
-      idct->num_empty_blocks++;
-   }
+      for (i = 0; i < 4; ++i) {
+         v_dst[i] = v;
+      }
 
-   v.x = x;
-   v.y = y;
+   } else {
 
-   for (i = 0; i < 4; ++i) {
-      v_dst[i] = v;
+      /* while empty blocks fills the vector buffer from right to left */
+      flush_empty_block(idct, x, y);
    }
 }
 
 void
 vl_idct_flush(struct vl_idct *idct)
 {
+   flush_empty_block(idct, ~1, ~1);
    xfer_buffers_unmap(idct);
 
    if(idct->num_blocks > 0) {
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 7234382196f..c26f5cb8c09 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -83,6 +83,12 @@ struct vl_idct
    } vertex_bufs;
 
    unsigned num_blocks;
+
+   struct
+   {
+      unsigned l_x, l_y, r_x, r_y;
+   } next_empty_block;
+
    unsigned num_empty_blocks;
 
    struct pipe_transfer *tex_transfer;
-- 
cgit v1.2.3


From a984c67b316ac2ca9aaf6d38a3127cf3d61a249e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 28 Nov 2010 12:23:36 +0100
Subject: make nr of render targets configureable for testing

---
 src/gallium/auxiliary/vl/vl_idct.c | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 74adaf3dd4c..2466d5b751b 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -44,6 +44,8 @@
 #define STAGE1_SCALE 4.0f
 #define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE)
 
+#define NR_RENDER_TARGETS 1
+
 struct vertex_shader_consts
 {
    struct vertex4f norm;
@@ -160,7 +162,12 @@ fetch_one(struct ureg_program *shader, struct ureg_dst m[2],
     */
    ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(tc, TGSI_SWIZZLE_X));
    ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(start, TGSI_SWIZZLE_Y));
+
+#if NR_RENDER_TARGETS == 8
    ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(block, TGSI_SWIZZLE_X));
+#else
+   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
+#endif
 
    for(i = 0; i < 2; ++i) {
       for(j = 0; j < 4; ++j) {
@@ -291,7 +298,7 @@ create_matrix_frag_shader(struct vl_idct *idct)
    struct ureg_src start[2];
 
    struct ureg_dst l[2], r[2];
-   struct ureg_dst t_tc, tmp, fragment[BLOCK_WIDTH];
+   struct ureg_dst t_tc, tmp, fragment[NR_RENDER_TARGETS];
 
    unsigned i;
 
@@ -311,16 +318,24 @@ create_matrix_frag_shader(struct vl_idct *idct)
    start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
    start[1] = ureg_imm1f(shader, 0.0f);
 
-   for (i = 0; i < BLOCK_WIDTH; ++i)
+   for (i = 0; i < NR_RENDER_TARGETS; ++i)
        fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
 
    fetch_four(shader, l, tc[0], sampler[0], start[0], false, source->width0);
    ureg_MUL(shader, l[0], ureg_src(l[0]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
    ureg_MUL(shader, l[1], ureg_src(l[1]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
    
-   for (i = 0; i < BLOCK_WIDTH; ++i) {
+   for (i = 0; i < NR_RENDER_TARGETS; ++i) {
+
+#if NR_RENDER_TARGETS == 8
       ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i));
       fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], true, matrix->width0);
+#elif NR_RENDER_TARGETS == 1
+      fetch_four(shader, r, tc[1], sampler[1], start[1], true, matrix->width0);
+#else
+#error invalid number of render targets
+#endif
+
       matrix_mul(shader, fragment[i], l, r);
       ureg_release_temporary(shader, r[0]);
       ureg_release_temporary(shader, r[1]);
@@ -415,8 +430,8 @@ init_buffers(struct vl_idct *idct)
 
    template.target = PIPE_TEXTURE_3D;
    template.format = PIPE_FORMAT_R16_SNORM;
-   template.width0 = idct->destination->width0 / 8;
-   template.depth0 = 8;
+   template.width0 = idct->destination->width0 / NR_RENDER_TARGETS;
+   template.depth0 = NR_RENDER_TARGETS;
    template.usage = PIPE_USAGE_STATIC;
    idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
@@ -527,8 +542,8 @@ init_state(struct vl_idct *idct)
    idct->fb_state[0].width = idct->textures.individual.intermediate->width0;
    idct->fb_state[0].height = idct->textures.individual.intermediate->height0;
 
-   idct->fb_state[0].nr_cbufs = 8;
-   for(i = 0; i < 8; ++i) {
+   idct->fb_state[0].nr_cbufs = NR_RENDER_TARGETS;
+   for(i = 0; i < NR_RENDER_TARGETS; ++i) {
       idct->fb_state[0].cbufs[i] = idct->pipe->screen->get_tex_surface(
          idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, i,
          PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
@@ -579,7 +594,7 @@ cleanup_state(struct vl_idct *idct)
 {
    unsigned i;
 
-   for(i = 0; i < 8; ++i) {
+   for(i = 0; i < NR_RENDER_TARGETS; ++i) {
       idct->pipe->screen->tex_surface_destroy(idct->fb_state[0].cbufs[i]);
    }
 
-- 
cgit v1.2.3


From 4abe7382882a451a7750ccc451b8568768d122cb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 28 Nov 2010 14:48:31 +0100
Subject: use a shadow buffer for vertex data to optimize memory access

---
 src/gallium/auxiliary/Makefile               |   3 +-
 src/gallium/auxiliary/vl/vl_idct.c           | 165 ++++++++-------------------
 src/gallium/auxiliary/vl/vl_idct.h           |  14 +--
 src/gallium/auxiliary/vl/vl_types.h          |   5 +
 src/gallium/auxiliary/vl/vl_vertex_buffers.c | 116 +++++++++++++++++++
 src/gallium/auxiliary/vl/vl_vertex_buffers.h |  75 ++++++++++++
 6 files changed, 249 insertions(+), 129 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_vertex_buffers.c
 create mode 100644 src/gallium/auxiliary/vl/vl_vertex_buffers.h

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index bf90a704dd4..997478f0664 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -147,7 +147,8 @@ C_SOURCES = \
 	vl/vl_mpeg12_mc_renderer.c \
 	vl/vl_compositor.c \
 	vl/vl_csc.c \
-        vl/vl_idct.c
+        vl/vl_idct.c \
+        vl/vl_vertex_buffers.c
 
 GALLIVM_SOURCES = \
         gallivm/lp_bld_arit.c \
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 2466d5b751b..22feff8d8d4 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -26,6 +26,7 @@
  **************************************************************************/
 
 #include "vl_idct.h"
+#include "vl_vertex_buffers.h"
 #include "util/u_draw.h"
 #include <assert.h>
 #include <pipe/p_context.h>
@@ -78,11 +79,6 @@ static const float const_matrix[8][8] = {
    {  0.0975451f, -0.2777850f,  0.4157350f, -0.4903930f,  0.4903930f, -0.4157350f,  0.277786f, -0.0975458f }
 };
 
-/* vertices for a quad covering a block */
-static const struct vertex2f const_quad[4] = {
-   {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
-};
-
 static void *
 create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
 {
@@ -409,11 +405,6 @@ init_buffers(struct vl_idct *idct)
    struct pipe_vertex_element vertex_elems[2];
    unsigned i;
 
-   idct->max_blocks =
-      align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
-      align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
-      idct->destination->depth0;
-
    memset(&template, 0, sizeof(struct pipe_resource));
    template.last_level = 0;
    template.depth0 = 1;
@@ -443,15 +434,7 @@ init_buffers(struct vl_idct *idct)
       idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
    }
 
-   idct->vertex_bufs.individual.quad.stride = sizeof(struct vertex2f);
-   idct->vertex_bufs.individual.quad.max_index = 4 * idct->max_blocks - 1;
-   idct->vertex_bufs.individual.quad.buffer_offset = 0;
-   idct->vertex_bufs.individual.quad.buffer = pipe_buffer_create
-   (
-      idct->pipe->screen,
-      PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vertex2f) * 4 * idct->max_blocks
-   );
+   idct->vertex_bufs.individual.quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks);
 
    if(idct->vertex_bufs.individual.quad.buffer == NULL)
       return false;
@@ -503,36 +486,12 @@ cleanup_buffers(struct vl_idct *idct)
    pipe_resource_reference(&idct->vertex_bufs.individual.pos.buffer, NULL);
 }
 
-static void
-init_constants(struct vl_idct *idct)
-{
-   struct pipe_transfer *buf_transfer;
-   struct vertex2f *v;
-
-   unsigned i;
-
-   /* quad vectors */
-   v = pipe_buffer_map
-   (
-      idct->pipe,
-      idct->vertex_bufs.individual.quad.buffer,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buf_transfer
-   );
-   for ( i = 0; i < idct->max_blocks; ++i)
-     memcpy(v + i * 4, &const_quad, sizeof(const_quad));
-   pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.quad.buffer, buf_transfer);
-}
-
 static void
 init_state(struct vl_idct *idct)
 {
    struct pipe_sampler_state sampler;
    unsigned i;
 
-   idct->num_blocks = 0;
-   idct->num_empty_blocks = 0;
-
    idct->viewport[0].scale[0] = idct->textures.individual.intermediate->width0;
    idct->viewport[0].scale[1] = idct->textures.individual.intermediate->height0;
 
@@ -674,26 +633,11 @@ xfer_buffers_map(struct vl_idct *idct)
    );
 
    idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
-
-   idct->vectors = pipe_buffer_map
-   (
-      idct->pipe,
-      idct->vertex_bufs.individual.pos.buffer,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &idct->vec_transfer
-   );
-
-   idct->next_empty_block.l_x = ~1;
-   idct->next_empty_block.l_y = ~1;
-   idct->next_empty_block.r_x = ~1;
-   idct->next_empty_block.r_y = ~1;
 }
 
 static void
 xfer_buffers_unmap(struct vl_idct *idct)
 {
-   pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, idct->vec_transfer);
-
    idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
    idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
 }
@@ -708,6 +652,11 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
    pipe_resource_reference(&idct->textures.individual.transpose, matrix);
    pipe_resource_reference(&idct->destination, dst);
 
+   idct->max_blocks =
+      align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
+      align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
+      idct->destination->depth0;
+
    if(!init_buffers(idct))
       return false;
 
@@ -716,9 +665,21 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
       return false;
    }
 
+   if(!vl_vb_init(&idct->blocks, idct->max_blocks)) {
+      cleanup_shaders(idct);
+      cleanup_buffers(idct);
+      return false;
+   }
+
+   if(!vl_vb_init(&idct->empty_blocks, idct->max_blocks)) {
+      vl_vb_cleanup(&idct->blocks);
+      cleanup_shaders(idct);
+      cleanup_buffers(idct);
+      return false;
+   }
+
    init_state(idct);
 
-   init_constants(idct);
    xfer_buffers_map(idct);
 
    return true;
@@ -727,6 +688,8 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
 void
 vl_idct_cleanup(struct vl_idct *idct)
 {
+   vl_vb_cleanup(&idct->blocks);
+   vl_vb_cleanup(&idct->empty_blocks);
    cleanup_shaders(idct);
    cleanup_buffers(idct);
 
@@ -735,43 +698,9 @@ vl_idct_cleanup(struct vl_idct *idct)
    pipe_resource_reference(&idct->destination, NULL);
 }
 
-static void
-flush_empty_block(struct vl_idct *idct, unsigned new_x, unsigned new_y)
-{
-   if (idct->next_empty_block.l_x == ~1 ||
-       idct->next_empty_block.l_y == ~1) {
-   
-      idct->next_empty_block.l_x = new_x;
-      idct->next_empty_block.l_y = new_y;
-
-   } else if (idct->next_empty_block.r_x != (new_x - 1) ||
-              idct->next_empty_block.r_y != new_y) {
-
-      struct vertex2f l, r, *v_dst;
-
-      v_dst = idct->vectors + (idct->max_blocks - idct->num_empty_blocks) * 4 - 4;
-
-      l.x = idct->next_empty_block.l_x;
-      l.y = idct->next_empty_block.l_y;
-      r.x = idct->next_empty_block.r_x;
-      r.y = idct->next_empty_block.r_y;
-      v_dst[0] = v_dst[3] = l;
-      v_dst[1] = v_dst[2] = r;
-
-      idct->next_empty_block.l_x = new_x;
-      idct->next_empty_block.l_y = new_y;
-      idct->num_empty_blocks++;
-   }
-
-   idct->next_empty_block.r_x = new_x;
-   idct->next_empty_block.r_y = new_y;
-}
-
 void
 vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 {
-   struct vertex2f v, *v_dst;
-
    unsigned tex_pitch;
    short *texels;
 
@@ -786,32 +715,38 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
       for (i = 0; i < BLOCK_HEIGHT; ++i)
          memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
 
-      /* non empty blocks fills the vector buffer from left to right */
-      v_dst = idct->vectors + idct->num_blocks * 4;
-
-      idct->num_blocks++;
-
-      v.x = x;
-      v.y = y;
-
-      for (i = 0; i < 4; ++i) {
-         v_dst[i] = v;
-      }
-
+      vl_vb_add_block(&idct->blocks, false, x, y);
    } else {
 
-      /* while empty blocks fills the vector buffer from right to left */
-      flush_empty_block(idct, x, y);
+      vl_vb_add_block(&idct->empty_blocks, true, x, y);
    }
 }
 
 void
 vl_idct_flush(struct vl_idct *idct)
 {
-   flush_empty_block(idct, ~1, ~1);
+   struct pipe_transfer *vec_transfer;
+   struct quadf *vectors;
+   unsigned num_blocks, num_empty_blocks;
+
+   assert(idct);
+
+   vectors = pipe_buffer_map
+   (
+      idct->pipe,
+      idct->vertex_bufs.individual.pos.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &vec_transfer
+   );
+
+   num_blocks = vl_vb_upload(&idct->blocks, vectors);
+   num_empty_blocks = vl_vb_upload(&idct->empty_blocks, vectors + num_blocks);
+
+   pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, vec_transfer);
+
    xfer_buffers_unmap(idct);
 
-   if(idct->num_blocks > 0) {
+   if(num_blocks > 0) {
 
       /* first stage */
       idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]);
@@ -824,7 +759,7 @@ vl_idct_flush(struct vl_idct *idct)
       idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
 
-      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
+      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
 
       /* second stage */
       idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
@@ -837,10 +772,10 @@ vl_idct_flush(struct vl_idct *idct)
       idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
 
-      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
+      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
    }
 
-   if(idct->num_empty_blocks > 0) {
+   if(num_empty_blocks > 0) {
 
       /* empty block handling */
       idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
@@ -851,12 +786,8 @@ vl_idct_flush(struct vl_idct *idct)
       idct->pipe->bind_vs_state(idct->pipe, idct->eb_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs);
 
-      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS,
-         (idct->max_blocks - idct->num_empty_blocks) * 4,
-         idct->num_empty_blocks * 4);
+      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, num_blocks * 4, num_empty_blocks * 4);
    }
 
-   idct->num_blocks = 0;
-   idct->num_empty_blocks = 0;
    xfer_buffers_map(idct);
 }
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index c26f5cb8c09..94a5c73977f 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -29,6 +29,7 @@
 #define vl_idct_h
 
 #include <pipe/p_state.h>
+#include "vl_vertex_buffers.h"
 
 struct vl_idct
 {
@@ -82,20 +83,11 @@ struct vl_idct
       struct { struct pipe_vertex_buffer quad, pos; } individual;
    } vertex_bufs;
 
-   unsigned num_blocks;
-
-   struct
-   {
-      unsigned l_x, l_y, r_x, r_y;
-   } next_empty_block;
-
-   unsigned num_empty_blocks;
+   struct vl_vertex_buffer blocks;
+   struct vl_vertex_buffer empty_blocks;
 
    struct pipe_transfer *tex_transfer;
    short *texels;
-
-   struct pipe_transfer *vec_transfer;
-   struct vertex2f *vectors;
 };
 
 struct pipe_resource *vl_idct_upload_matrix(struct pipe_context *pipe);
diff --git a/src/gallium/auxiliary/vl/vl_types.h b/src/gallium/auxiliary/vl/vl_types.h
index ce175546894..eeabd43cb23 100644
--- a/src/gallium/auxiliary/vl/vl_types.h
+++ b/src/gallium/auxiliary/vl/vl_types.h
@@ -38,4 +38,9 @@ struct vertex4f
    float x, y, z, w;
 };
 
+struct quadf
+{
+   struct vertex2f bl, tl, tr, br;
+};
+
 #endif /* vl_types_h */
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
new file mode 100644
index 00000000000..6df11db0aef
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -0,0 +1,116 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+#include <pipe/p_context.h>
+#include <pipe/p_screen.h>
+#include <util/u_memory.h>
+#include <util/u_inlines.h>
+#include "vl_vertex_buffers.h"
+#include "vl_types.h"
+
+/* vertices for a quad covering a block */
+static const struct quadf const_quad = {
+   {0.0f, 1.0f}, {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}
+};
+
+struct pipe_vertex_buffer
+vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks)
+{
+   struct pipe_vertex_buffer quad;
+   struct pipe_transfer *buf_transfer;
+   struct quadf *v;
+
+   unsigned i;
+
+   assert(pipe);
+   assert(max_blocks);
+
+   /* create buffer */
+   quad.stride = sizeof(struct vertex2f);
+   quad.max_index = 4 * max_blocks - 1;
+   quad.buffer_offset = 0;
+   quad.buffer = pipe_buffer_create
+   (
+      pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      sizeof(struct vertex2f) * 4 * max_blocks
+   );
+
+   if(!quad.buffer)
+      return quad;
+
+   /* and fill it */
+   v = pipe_buffer_map
+   (
+      pipe,
+      quad.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   for ( i = 0; i < max_blocks; ++i)
+     memcpy(v + i, &const_quad, sizeof(const_quad));
+
+   pipe_buffer_unmap(pipe, quad.buffer, buf_transfer);
+
+   return quad;
+}
+
+bool
+vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks)
+{
+   assert(buffer);
+
+   buffer->num_blocks = 0;
+   buffer->blocks = MALLOC(max_blocks * sizeof(struct quadf));
+   return buffer->blocks != NULL;
+}
+
+unsigned
+vl_vb_upload(struct vl_vertex_buffer *buffer, struct quadf *dst)
+{
+   unsigned todo;
+
+   assert(buffer);
+
+   todo = buffer->num_blocks;
+   buffer->num_blocks = 0;
+
+   if(todo)
+      memcpy(dst, buffer->blocks, sizeof(struct quadf) * todo);
+
+   return todo;
+}
+
+void
+vl_vb_cleanup(struct vl_vertex_buffer *buffer)
+{
+   assert(buffer);
+
+   FREE(buffer->blocks);
+}
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
new file mode 100644
index 00000000000..43ddc342d3d
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -0,0 +1,75 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef vl_vertex_buffers_h
+#define vl_vertex_buffers_h
+
+#include <assert.h>
+#include <pipe/p_state.h>
+#include "vl_types.h"
+
+struct vl_vertex_buffer
+{
+   unsigned num_blocks;
+   struct quadf *blocks;
+};
+
+struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks);
+
+bool vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks);
+
+static inline bool
+vl_vb_add_block(struct vl_vertex_buffer *buffer, bool allow_merge, signed x, signed y)
+{
+   struct quadf *quad;
+
+   assert(buffer);
+
+   allow_merge &= buffer->num_blocks > 0;
+   if (allow_merge) {
+
+      quad = buffer->blocks + buffer->num_blocks - 1;
+      if(quad->tr.x == (x - 1) && quad->br.x == (x - 1) && 
+         quad->tr.y == y && quad->br.y == y) {
+
+         quad->tr.x = quad->br.x = x;
+         quad->tr.y = quad->br.y = y;
+         return true;
+      } 
+   }
+
+   quad = buffer->blocks + buffer->num_blocks;
+   quad->bl.x = quad->tl.x = quad->tr.x = quad->br.x = x;
+   quad->bl.y = quad->tl.y = quad->tr.y = quad->br.y = y;
+   buffer->num_blocks++;
+   return false;
+}
+
+unsigned vl_vb_upload(struct vl_vertex_buffer *buffer, struct quadf *dst);
+
+void vl_vb_cleanup(struct vl_vertex_buffer *buffer);
+
+#endif
-- 
cgit v1.2.3


From 3bbbb3c54f633ab10e7e2302e8edf3cf1f801858 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 28 Nov 2010 18:49:57 +0100
Subject: move macroblock type handling into its own structure

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 420 +++++++----------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  21 +-
 2 files changed, 151 insertions(+), 290 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 7bb50596de6..b783612b174 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -87,24 +87,21 @@ enum VS_OUTPUT
    VS_O_MV3
 };
 
-enum MACROBLOCK_TYPE
-{
-   MACROBLOCK_TYPE_INTRA,
-   MACROBLOCK_TYPE_FWD_FRAME_PRED,
-   MACROBLOCK_TYPE_FWD_FIELD_PRED,
-   MACROBLOCK_TYPE_BKWD_FRAME_PRED,
-   MACROBLOCK_TYPE_BKWD_FIELD_PRED,
-   MACROBLOCK_TYPE_BI_FRAME_PRED,
-   MACROBLOCK_TYPE_BI_FIELD_PRED,
-
-   NUM_MACROBLOCK_TYPES
-};
-
 /* vertices for a quad covering a macroblock */
 static const struct vertex2f const_quad[4] = {
    {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
 };
 
+static const unsigned const_mbtype_config[VL_NUM_MACROBLOCK_TYPES][2] = {
+   [VL_MACROBLOCK_TYPE_INTRA]           = { 0, 0 },
+   [VL_MACROBLOCK_TYPE_FWD_FRAME_PRED]  = { 1, 1 },
+   [VL_MACROBLOCK_TYPE_FWD_FIELD_PRED]  = { 1, 2 },
+   [VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED] = { 1, 1 },
+   [VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED] = { 1, 2 },
+   [VL_MACROBLOCK_TYPE_BI_FRAME_PRED]   = { 2, 1 },
+   [VL_MACROBLOCK_TYPE_BI_FIELD_PRED]   = { 2, 2 }
+};
+
 static void *
 create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
 {
@@ -299,222 +296,111 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
 }
 
 static void *
-create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
+create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
 {
    struct ureg_program *shader;
-   struct ureg_dst field, texel;
-   struct ureg_dst fragment;
-
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
-
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
-   /*
-    * texel = fetch_ycbcr()
-    * fragment = texel * scale + 0.5
-    */
-   field = calc_field(shader);
-   texel = fetch_ycbcr(r, shader, field);
-   ureg_ADD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
-
-   ureg_release_temporary(shader, field);
-   ureg_release_temporary(shader, texel);
-   ureg_END(shader);
-
-   return ureg_create_shader_and_destroy(shader, r->pipe);
-}
-
-static void *
-create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
-{
-   struct ureg_program *shader;
-   struct ureg_src tc;
-   struct ureg_src sampler;
-   struct ureg_dst field, texel, ref;
+   struct ureg_src tc[ref_frames * mv_per_frame], sampler[ref_frames], result;
+   struct ureg_dst field, texel, ref[ref_frames];
    struct ureg_dst fragment;
+   unsigned i, label;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return NULL;
 
-   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0, TGSI_INTERPOLATE_LINEAR);
-   sampler = ureg_DECL_sampler(shader, 3);
-
-   ref = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   /*
-    * texel = fetch_ycbcr()
-    * ref = tex(tc, sampler)
-    * fragment = texel * scale + ref
-    */
-   field = calc_field(shader);
-   texel = fetch_ycbcr(r, shader, field);
-   ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc, sampler);
-   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref));
-
-   ureg_release_temporary(shader, field);
-   ureg_release_temporary(shader, texel);
-   ureg_release_temporary(shader, ref);
-   ureg_END(shader);
-
-   return ureg_create_shader_and_destroy(shader, r->pipe);
-}
-
-static void *
-create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
-{
-   struct ureg_program *shader;
-   struct ureg_src tc[2];
-   struct ureg_src sampler;
-   struct ureg_dst texel, ref, field;
-   struct ureg_dst fragment;
-   unsigned i, label;
-
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
-
-   for (i = 0; i < 2; ++i)
+   for (i = 0; i < ref_frames * mv_per_frame; ++i)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
-   sampler = ureg_DECL_sampler(shader, 3);
 
-   ref = ureg_DECL_temporary(shader);
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+   for (i = 0; i < ref_frames; ++i) {
+      sampler[i] = ureg_DECL_sampler(shader, i + 3);
+      ref[i] = ureg_DECL_temporary(shader);
+   }
 
-   /*
-    * texel = fetch_ycbcr()
-    * field = calc_field();
-    * if(field == 1)
-    *    ref = tex(tc[1], sampler)
-    * else
-    *    ref = tex(tc[0], sampler)
-    * fragment = texel * scale + ref
-    */
    field = calc_field(shader);
    texel = fetch_ycbcr(r, shader, field);
 
-   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
-      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[1], sampler);
-   ureg_ELSE(shader, &label);
-      ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[0], sampler);
-   ureg_ENDIF(shader);
-
-   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref));
-
-   ureg_release_temporary(shader, field);
-   ureg_release_temporary(shader, texel);
-   ureg_release_temporary(shader, ref);
-   ureg_END(shader);
-
-   return ureg_create_shader_and_destroy(shader, r->pipe);
-}
+   switch(ref_frames) {
+   case 0:
+      result = ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X);
+      break;
+
+   case 1:
+      if(mv_per_frame == 1)
+         ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
+      else {
+         ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
+            ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
+         ureg_ELSE(shader, &label);
+            ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
+         ureg_ENDIF(shader);
+      }
+      result = ureg_src(ref[0]);
+      break;
 
-static void *
-create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
-{
-   struct ureg_program *shader;
-   struct ureg_src tc[2];
-   struct ureg_src sampler[2];
-   struct ureg_dst field, texel, ref[2];
-   struct ureg_dst fragment;
-   unsigned i;
+   case 2:
+      if(mv_per_frame == 1) {
+         ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
+         ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]);
+      } else {
+         ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
+            ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
+            ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[3], sampler[1]);
+         ureg_ELSE(shader, &label);
+            ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
+            ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[2], sampler[1]);
+         ureg_ENDIF(shader);
+      }
 
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
+      ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
+      result = ureg_src(ref[0]);
+      break;
 
-   for (i = 0; i < 2; ++i)  {
-      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
-      sampler[i] = ureg_DECL_sampler(shader, i + 3);
+   default:
+      assert(0);
    }
+   ureg_ADD(shader, fragment, ureg_src(texel), result);
 
-   ref[0] = ureg_DECL_temporary(shader);
-   ref[1] = ureg_DECL_temporary(shader);
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
-   /*
-    * texel = fetch_ycbcr()
-    * ref[0..1 = tex(tc[3..4], sampler[3..4])
-    * ref[0] = lerp(ref[0], ref[1], 0.5)
-    * fragment = texel * scale + ref[0]
-    */
-   field = calc_field(shader);
-   texel = fetch_ycbcr(r, shader, field);
-   ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
-   ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]);
-   ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
-
-   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref[0]));
+   for (i = 0; i < ref_frames; ++i)
+      ureg_release_temporary(shader, ref[i]);
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
-   ureg_release_temporary(shader, ref[0]);
-   ureg_release_temporary(shader, ref[1]);
    ureg_END(shader);
 
    return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
-static void *
-create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
+static bool
+init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type)
 {
-   struct ureg_program *shader;
-   struct ureg_src tc[4];
-   struct ureg_src sampler[2];
-   struct ureg_dst texel, ref[2], field;
-   struct ureg_dst fragment;
-   unsigned i, label;
-
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
-
-   for (i = 0; i < 4; ++i)
-      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
-   for (i = 0; i < 2; ++i)
-      sampler[i] = ureg_DECL_sampler(shader, i + 3);
+   unsigned ref_frames, mv_per_frame;
+   struct vl_mc_mbtype_handler *handler;
 
-   texel = ureg_DECL_temporary(shader);
-   ref[0] = ureg_DECL_temporary(shader);
-   ref[1] = ureg_DECL_temporary(shader);
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+   assert(r);
 
-   /*
-    * texel = fetch_ycbcr()
-    * if(field == 1)
-    *    ref[0..1] = tex(tc[1|3], sampler[0..1])
-    * else
-    *    ref[0..1] = tex(tc[0|2], sampler[0..1])
-    * ref[0] = lerp(ref[0], ref[1], 0.5)
-    * fragment = texel * scale + ref[0]
-    */
-   field = calc_field(shader);
-   texel = fetch_ycbcr(r, shader, field);
+   ref_frames = const_mbtype_config[type][0];
+   mv_per_frame = const_mbtype_config[type][1];
 
-   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
-      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
-      ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[3], sampler[1]);
-   ureg_ELSE(shader, &label);
-      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
-      ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[2], sampler[1]);
-   ureg_ENDIF(shader);
+   handler = &r->mbtype_handlers[type];
 
-   ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
+   handler->vs = create_vert_shader(r, ref_frames, mv_per_frame);
+   handler->fs = create_frag_shader(r, ref_frames, mv_per_frame);
 
-   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref[0]));
+   return handler->vs != NULL && handler->fs != NULL;
+}
 
-   ureg_release_temporary(shader, field);
-   ureg_release_temporary(shader, texel);
-   ureg_release_temporary(shader, ref[0]);
-   ureg_release_temporary(shader, ref[1]);
-   ureg_END(shader);
+static void
+cleanup_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type)
+{
+   assert(r);
 
-   return ureg_create_shader_and_destroy(shader, r->pipe);
+   r->pipe->delete_vs_state(r->pipe, r->mbtype_handlers[type].vs);
+   r->pipe->delete_fs_state(r->pipe, r->mbtype_handlers[type].fs);
 }
 
+
 static bool
 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
 {
@@ -594,54 +480,6 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
       r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
 }
 
-static bool
-init_shaders(struct vl_mpeg12_mc_renderer *r)
-{
-   assert(r);
-
-   r->i_vs = create_vert_shader(r, 0, 0);
-   r->i_fs = create_intra_frag_shader(r);
-   
-   r->p_vs[0] = create_vert_shader(r, 1, 1);
-   r->p_vs[1] = create_vert_shader(r, 1, 2);
-   r->p_fs[0] = create_frame_pred_frag_shader(r);
-   r->p_fs[1] = create_field_pred_frag_shader(r);
-
-   r->b_vs[0] = create_vert_shader(r, 2, 1);
-   r->b_vs[1] = create_vert_shader(r, 2, 2);
-   r->b_fs[0] = create_frame_bi_pred_frag_shader(r);
-   r->b_fs[1] = create_field_bi_pred_frag_shader(r);
-
-   return
-      r->i_vs != NULL &&
-      r->i_fs != NULL &&
-      r->p_vs[0] != NULL &&
-      r->p_vs[1] != NULL &&
-      r->p_fs[0] != NULL &&
-      r->p_fs[1] != NULL &&
-      r->b_vs[0] != NULL &&
-      r->b_vs[1] != NULL &&
-      r->b_fs[0] != NULL &&
-      r->b_fs[1] != NULL;
-}
-
-static void
-cleanup_shaders(struct vl_mpeg12_mc_renderer *r)
-{
-   assert(r);
-
-   r->pipe->delete_vs_state(r->pipe, r->i_vs);
-   r->pipe->delete_fs_state(r->pipe, r->i_fs);
-   r->pipe->delete_vs_state(r->pipe, r->p_vs[0]);
-   r->pipe->delete_vs_state(r->pipe, r->p_vs[1]);
-   r->pipe->delete_fs_state(r->pipe, r->p_fs[0]);
-   r->pipe->delete_fs_state(r->pipe, r->p_fs[1]);
-   r->pipe->delete_vs_state(r->pipe, r->b_vs[0]);
-   r->pipe->delete_vs_state(r->pipe, r->b_vs[1]);
-   r->pipe->delete_fs_state(r->pipe, r->b_fs[0]);
-   r->pipe->delete_fs_state(r->pipe, r->b_fs[1]);
-}
-
 static bool
 init_buffers(struct vl_mpeg12_mc_renderer *r)
 {
@@ -836,23 +674,23 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
    FREE(r->macroblock_buf);
 }
 
-static enum MACROBLOCK_TYPE
+static enum VL_MACROBLOCK_TYPE
 get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
 {
    assert(mb);
 
    switch (mb->mb_type) {
       case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
-         return MACROBLOCK_TYPE_INTRA;
+         return VL_MACROBLOCK_TYPE_INTRA;
       case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
          return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
-            MACROBLOCK_TYPE_FWD_FRAME_PRED : MACROBLOCK_TYPE_FWD_FIELD_PRED;
+            VL_MACROBLOCK_TYPE_FWD_FRAME_PRED : VL_MACROBLOCK_TYPE_FWD_FIELD_PRED;
       case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
          return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
-            MACROBLOCK_TYPE_BKWD_FRAME_PRED : MACROBLOCK_TYPE_BKWD_FIELD_PRED;
+            VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED : VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED;
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
          return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
-            MACROBLOCK_TYPE_BI_FRAME_PRED : MACROBLOCK_TYPE_BI_FIELD_PRED;
+            VL_MACROBLOCK_TYPE_BI_FRAME_PRED : VL_MACROBLOCK_TYPE_BI_FIELD_PRED;
       default:
          assert(0);
    }
@@ -994,7 +832,7 @@ static void
 gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
                       unsigned *num_macroblocks)
 {
-   unsigned offset[NUM_MACROBLOCK_TYPES];
+   unsigned offset[VL_NUM_MACROBLOCK_TYPES];
    struct vert_stream_0 *ycbcr_vb;
    struct vertex2f *ref_vb[2];
    struct pipe_transfer *buf_transfer[3];
@@ -1004,13 +842,13 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
    assert(num_macroblocks);
 
    for (i = 0; i < r->num_macroblocks; ++i) {
-      enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
+      enum VL_MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
       ++num_macroblocks[mb_type];
    }
 
    offset[0] = 0;
 
-   for (i = 1; i < NUM_MACROBLOCK_TYPES; ++i)
+   for (i = 1; i < VL_NUM_MACROBLOCK_TYPES; ++i)
       offset[i] = offset[i - 1] + num_macroblocks[i - 1];
 
    ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
@@ -1031,7 +869,7 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
       );
 
    for (i = 0; i < r->num_macroblocks; ++i) {
-      enum MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
+      enum VL_MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
 
       gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
                            ycbcr_vb, ref_vb);
@@ -1074,7 +912,7 @@ static struct pipe_sampler_view
 static void
 flush(struct vl_mpeg12_mc_renderer *r)
 {
-   unsigned num_macroblocks[NUM_MACROBLOCK_TYPES] = { 0 };
+   unsigned num_macroblocks[VL_NUM_MACROBLOCK_TYPES] = { 0 };
    unsigned vb_start = 0;
 
    assert(r);
@@ -1089,80 +927,80 @@ flush(struct vl_mpeg12_mc_renderer *r)
    r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
    r->pipe->set_viewport_state(r->pipe, &r->viewport);
 
-   if (num_macroblocks[MACROBLOCK_TYPE_INTRA] > 0) {
+   if (num_macroblocks[VL_MACROBLOCK_TYPE_INTRA] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
       r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->i_vs);
-      r->pipe->bind_fs_state(r->pipe, r->i_fs);
+      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_INTRA].vs);
+      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_INTRA].fs);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_INTRA] * 4);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_INTRA] * 4;
+                       num_macroblocks[VL_MACROBLOCK_TYPE_INTRA] * 4);
+      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_INTRA] * 4;
    }
 
-   if (num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
+   if (num_macroblocks[VL_MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->past->texture;
       r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
       r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
-      r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
+      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_FWD_FRAME_PRED].vs);
+      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_FWD_FRAME_PRED].fs);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 4);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FRAME_PRED] * 4;
+                       num_macroblocks[VL_MACROBLOCK_TYPE_FWD_FRAME_PRED] * 4);
+      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_FWD_FRAME_PRED] * 4;
    }
 
-   if (num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0) {
+   if (num_macroblocks[VL_MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->past->texture;
       r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
       r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
-      r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
+      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_FWD_FIELD_PRED].vs);
+      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_FWD_FIELD_PRED].fs);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 4);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_FWD_FIELD_PRED] * 4;
+                       num_macroblocks[VL_MACROBLOCK_TYPE_FWD_FIELD_PRED] * 4);
+      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_FWD_FIELD_PRED] * 4;
    }
 
-   if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
+   if (num_macroblocks[VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->future->texture;
       r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
       r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->p_vs[0]);
-      r->pipe->bind_fs_state(r->pipe, r->p_fs[0]);
+      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED].vs);
+      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED].fs);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 4);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 4;
+                       num_macroblocks[VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 4);
+      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 4;
    }
 
-   if (num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0) {
+   if (num_macroblocks[VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
       r->textures.individual.ref[0] = r->future->texture;
       r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
       r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->p_vs[1]);
-      r->pipe->bind_fs_state(r->pipe, r->p_fs[1]);
+      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED].vs);
+      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED].fs);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 4);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 4;
+                       num_macroblocks[VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 4);
+      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 4;
    }
 
-   if (num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
+   if (num_macroblocks[VL_MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 4, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
       r->textures.individual.ref[0] = r->past->texture;
@@ -1171,15 +1009,15 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
       r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->b_vs[0]);
-      r->pipe->bind_fs_state(r->pipe, r->b_fs[0]);
+      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BI_FRAME_PRED].vs);
+      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BI_FRAME_PRED].fs);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 4);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FRAME_PRED] * 4;
+                       num_macroblocks[VL_MACROBLOCK_TYPE_BI_FRAME_PRED] * 4);
+      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_BI_FRAME_PRED] * 4;
    }
 
-   if (num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] > 0) {
+   if (num_macroblocks[VL_MACROBLOCK_TYPE_BI_FIELD_PRED] > 0) {
       r->pipe->set_vertex_buffers(r->pipe, 4, r->vertex_bufs.all);
       r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
       r->textures.individual.ref[0] = r->past->texture;
@@ -1188,12 +1026,12 @@ flush(struct vl_mpeg12_mc_renderer *r)
       r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
       r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
       r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->b_vs[1]);
-      r->pipe->bind_fs_state(r->pipe, r->b_fs[1]);
+      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BI_FIELD_PRED].vs);
+      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BI_FIELD_PRED].fs);
 
       util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 4);
-      vb_start += num_macroblocks[MACROBLOCK_TYPE_BI_FIELD_PRED] * 4;
+                       num_macroblocks[VL_MACROBLOCK_TYPE_BI_FIELD_PRED] * 4);
+      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_BI_FIELD_PRED] * 4;
    }
 
    r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
@@ -1297,6 +1135,7 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            bool pot_buffers)
 {
    struct pipe_resource *idct_matrix;
+   unsigned i;
 
    assert(renderer);
    assert(pipe);
@@ -1323,8 +1162,9 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    if (!init_pipe_state(renderer))
       goto error_pipe_state;
 
-   if (!init_shaders(renderer))
-      goto error_shaders;
+   for(i = 0; i<VL_NUM_MACROBLOCK_TYPES; ++i)
+      if (!init_mbtype_handler(renderer, i))
+         goto error_mbtypes;
 
    if (!init_buffers(renderer))
       goto error_buffers;
@@ -1361,9 +1201,10 @@ error_idct_matrix:
    cleanup_buffers(renderer);
 
 error_buffers:
-   cleanup_shaders(renderer);
+   for(i = 0; i<VL_NUM_MACROBLOCK_TYPES; ++i)
+      cleanup_mbtype_handler(renderer, i);
 
-error_shaders:
+error_mbtypes:
    cleanup_pipe_state(renderer);
 
 error_pipe_state:
@@ -1374,6 +1215,8 @@ error_pipe_state:
 void
 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 {
+   unsigned i;
+
    assert(renderer);
 
    vl_idct_cleanup(&renderer->idct_y);
@@ -1382,7 +1225,8 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 
    util_delete_keymap(renderer->texview_map, renderer->pipe);
    cleanup_pipe_state(renderer);
-   cleanup_shaders(renderer);
+   for(i = 0; i<VL_NUM_MACROBLOCK_TYPES; ++i)
+      cleanup_mbtype_handler(renderer, i);
    cleanup_buffers(renderer);
 
    pipe_surface_reference(&renderer->surface, NULL);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index ed48b5b6b45..69fa4bb7f23 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -45,6 +45,24 @@ enum VL_MPEG12_MC_RENDERER_BUFFER_MODE
    VL_MPEG12_MC_RENDERER_BUFFER_PICTURE /* Larger batches, more memory */
 };
 
+enum VL_MACROBLOCK_TYPE
+{
+   VL_MACROBLOCK_TYPE_INTRA,
+   VL_MACROBLOCK_TYPE_FWD_FRAME_PRED,
+   VL_MACROBLOCK_TYPE_FWD_FIELD_PRED,
+   VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED,
+   VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED,
+   VL_MACROBLOCK_TYPE_BI_FRAME_PRED,
+   VL_MACROBLOCK_TYPE_BI_FIELD_PRED,
+
+   VL_NUM_MACROBLOCK_TYPES
+};
+
+struct vl_mc_mbtype_handler
+{
+   void *vs, *fs;
+};
+
 struct vl_mpeg12_mc_renderer
 {
    struct pipe_context *pipe;
@@ -79,8 +97,7 @@ struct vl_mpeg12_mc_renderer
       struct { struct pipe_sampler_view *y, *cb, *cr, *ref[2]; } individual;
    } sampler_views;
 
-   void *i_vs, *p_vs[2], *b_vs[2];
-   void *i_fs, *p_fs[2], *b_fs[2];
+   struct vl_mc_mbtype_handler mbtype_handlers[VL_NUM_MACROBLOCK_TYPES];
 
    union
    {
-- 
cgit v1.2.3


From 3e6a5077ca5968a3bf76e6ab6ba8f33487895066 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 28 Nov 2010 20:08:40 +0100
Subject: move vertex elemt states into mb type handlers

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 198 ++++++++---------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   7 +-
 2 files changed, 72 insertions(+), 133 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index b783612b174..dbc900d2614 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -373,7 +373,8 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 }
 
 static bool
-init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type)
+init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type,
+                    struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS])
 {
    unsigned ref_frames, mv_per_frame;
    struct vl_mc_mbtype_handler *handler;
@@ -388,16 +389,26 @@ init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE typ
    handler->vs = create_vert_shader(r, ref_frames, mv_per_frame);
    handler->fs = create_frag_shader(r, ref_frames, mv_per_frame);
 
-   return handler->vs != NULL && handler->fs != NULL;
+   handler->vertex_elems_state = r->pipe->create_vertex_elements_state(
+      r->pipe, 3 + ref_frames * mv_per_frame, vertex_elems);
+
+   return handler->vs != NULL &&
+          handler->fs != NULL &&
+          handler->vertex_elems_state != NULL;
 }
 
 static void
 cleanup_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type)
 {
+   struct vl_mc_mbtype_handler *handler;
+
    assert(r);
 
-   r->pipe->delete_vs_state(r->pipe, r->mbtype_handlers[type].vs);
-   r->pipe->delete_fs_state(r->pipe, r->mbtype_handlers[type].fs);
+   handler = &r->mbtype_handlers[type];
+
+   r->pipe->delete_vs_state(r->pipe, handler->vs);
+   r->pipe->delete_fs_state(r->pipe, handler->fs);
+   r->pipe->delete_vertex_elements_state(r->pipe, handler->vertex_elems_state);
 }
 
 
@@ -620,9 +631,8 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    vertex_elems[VS_I_MV3].vertex_buffer_index = 3;
    vertex_elems[VS_I_MV3].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   r->vertex_elems_state.individual.i = r->pipe->create_vertex_elements_state(r->pipe, 3, vertex_elems);
-   r->vertex_elems_state.individual.p = r->pipe->create_vertex_elements_state(r->pipe, 5, vertex_elems);
-   r->vertex_elems_state.individual.b = r->pipe->create_vertex_elements_state(r->pipe, 7, vertex_elems);
+   for(i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i)
+      init_mbtype_handler(r, i, vertex_elems);
 
    r->vs_const_buf = pipe_buffer_create
    (
@@ -666,11 +676,13 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
 
    for (i = 0; i < 3; ++i) {
       pipe_sampler_view_reference(&r->sampler_views.all[i], NULL);
-      r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state.all[i]);
       pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
       pipe_resource_reference(&r->textures.all[i], NULL);
    }
 
+   for(i = 0; i<VL_NUM_MACROBLOCK_TYPES; ++i)
+      cleanup_mbtype_handler(r, i);
+
    FREE(r->macroblock_buf);
 }
 
@@ -909,11 +921,58 @@ static struct pipe_sampler_view
    return sampler_view;
 }
 
+static unsigned
+flush_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type,
+                     unsigned vb_start, unsigned num_macroblocks)
+{
+   unsigned ref_frames, mv_per_frame;
+   struct vl_mc_mbtype_handler *handler;
+
+   assert(r);
+
+   ref_frames = const_mbtype_config[type][0];
+   mv_per_frame = const_mbtype_config[type][1];
+
+   handler = &r->mbtype_handlers[type];
+
+   r->pipe->set_vertex_buffers(r->pipe, 2 + ref_frames, r->vertex_bufs.all);
+   r->pipe->bind_vertex_elements_state(r->pipe, handler->vertex_elems_state);
+
+   if(ref_frames == 2) {
+
+      r->textures.individual.ref[0] = r->past->texture;
+      r->textures.individual.ref[1] = r->future->texture;
+      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
+      r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
+
+   } else if(ref_frames == 1) {
+
+      struct pipe_surface *ref;
+
+      if(type == VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED ||
+         type == VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED)
+         ref = r->future;
+      else
+         ref = r->past;
+
+      r->textures.individual.ref[0] = ref->texture;
+      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, ref);
+   }
+
+   r->pipe->set_fragment_sampler_views(r->pipe, 3 + ref_frames, r->sampler_views.all);
+   r->pipe->bind_fragment_sampler_states(r->pipe, 3 + ref_frames, r->samplers.all);
+   r->pipe->bind_vs_state(r->pipe, handler->vs);
+   r->pipe->bind_fs_state(r->pipe, handler->fs);
+
+   util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start, num_macroblocks * 4);
+   return num_macroblocks * 4;
+}
+
 static void
 flush(struct vl_mpeg12_mc_renderer *r)
 {
    unsigned num_macroblocks[VL_NUM_MACROBLOCK_TYPES] = { 0 };
-   unsigned vb_start = 0;
+   unsigned vb_start = 0, i;
 
    assert(r);
    assert(r->num_macroblocks == r->macroblocks_per_batch);
@@ -927,111 +986,9 @@ flush(struct vl_mpeg12_mc_renderer *r)
    r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
    r->pipe->set_viewport_state(r->pipe, &r->viewport);
 
-   if (num_macroblocks[VL_MACROBLOCK_TYPE_INTRA] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 2, r->vertex_bufs.all);
-      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.i);
-      r->pipe->set_fragment_sampler_views(r->pipe, 3, r->sampler_views.all);
-      r->pipe->bind_fragment_sampler_states(r->pipe, 3, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_INTRA].vs);
-      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_INTRA].fs);
-
-      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[VL_MACROBLOCK_TYPE_INTRA] * 4);
-      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_INTRA] * 4;
-   }
-
-   if (num_macroblocks[VL_MACROBLOCK_TYPE_FWD_FRAME_PRED] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
-      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
-      r->textures.individual.ref[0] = r->past->texture;
-      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
-      r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
-      r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_FWD_FRAME_PRED].vs);
-      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_FWD_FRAME_PRED].fs);
-
-      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[VL_MACROBLOCK_TYPE_FWD_FRAME_PRED] * 4);
-      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_FWD_FRAME_PRED] * 4;
-   }
-
-   if (num_macroblocks[VL_MACROBLOCK_TYPE_FWD_FIELD_PRED] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
-      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
-      r->textures.individual.ref[0] = r->past->texture;
-      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
-      r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
-      r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_FWD_FIELD_PRED].vs);
-      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_FWD_FIELD_PRED].fs);
-
-      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[VL_MACROBLOCK_TYPE_FWD_FIELD_PRED] * 4);
-      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_FWD_FIELD_PRED] * 4;
-   }
-
-   if (num_macroblocks[VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
-      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
-      r->textures.individual.ref[0] = r->future->texture;
-      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
-      r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
-      r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED].vs);
-      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED].fs);
-
-      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 4);
-      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED] * 4;
-   }
-
-   if (num_macroblocks[VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 3, r->vertex_bufs.all);
-      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.p);
-      r->textures.individual.ref[0] = r->future->texture;
-      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->future);
-      r->pipe->set_fragment_sampler_views(r->pipe, 4, r->sampler_views.all);
-      r->pipe->bind_fragment_sampler_states(r->pipe, 4, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED].vs);
-      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED].fs);
-
-      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 4);
-      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED] * 4;
-   }
-
-   if (num_macroblocks[VL_MACROBLOCK_TYPE_BI_FRAME_PRED] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 4, r->vertex_bufs.all);
-      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
-      r->textures.individual.ref[0] = r->past->texture;
-      r->textures.individual.ref[1] = r->future->texture;
-      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
-      r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
-      r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
-      r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BI_FRAME_PRED].vs);
-      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BI_FRAME_PRED].fs);
-
-      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[VL_MACROBLOCK_TYPE_BI_FRAME_PRED] * 4);
-      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_BI_FRAME_PRED] * 4;
-   }
-
-   if (num_macroblocks[VL_MACROBLOCK_TYPE_BI_FIELD_PRED] > 0) {
-      r->pipe->set_vertex_buffers(r->pipe, 4, r->vertex_bufs.all);
-      r->pipe->bind_vertex_elements_state(r->pipe, r->vertex_elems_state.individual.b);
-      r->textures.individual.ref[0] = r->past->texture;
-      r->textures.individual.ref[1] = r->future->texture;
-      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
-      r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
-      r->pipe->set_fragment_sampler_views(r->pipe, 5, r->sampler_views.all);
-      r->pipe->bind_fragment_sampler_states(r->pipe, 5, r->samplers.all);
-      r->pipe->bind_vs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BI_FIELD_PRED].vs);
-      r->pipe->bind_fs_state(r->pipe, r->mbtype_handlers[VL_MACROBLOCK_TYPE_BI_FIELD_PRED].fs);
-
-      util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start,
-                       num_macroblocks[VL_MACROBLOCK_TYPE_BI_FIELD_PRED] * 4);
-      vb_start += num_macroblocks[VL_MACROBLOCK_TYPE_BI_FIELD_PRED] * 4;
+   for (i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) {
+      if (num_macroblocks[i] > 0)
+         vb_start += flush_mbtype_handler(r, i, vb_start, num_macroblocks[i]);
    }
 
    r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
@@ -1135,7 +1092,6 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            bool pot_buffers)
 {
    struct pipe_resource *idct_matrix;
-   unsigned i;
 
    assert(renderer);
    assert(pipe);
@@ -1162,10 +1118,6 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    if (!init_pipe_state(renderer))
       goto error_pipe_state;
 
-   for(i = 0; i<VL_NUM_MACROBLOCK_TYPES; ++i)
-      if (!init_mbtype_handler(renderer, i))
-         goto error_mbtypes;
-
    if (!init_buffers(renderer))
       goto error_buffers;
 
@@ -1201,10 +1153,6 @@ error_idct_matrix:
    cleanup_buffers(renderer);
 
 error_buffers:
-   for(i = 0; i<VL_NUM_MACROBLOCK_TYPES; ++i)
-      cleanup_mbtype_handler(renderer, i);
-
-error_mbtypes:
    cleanup_pipe_state(renderer);
 
 error_pipe_state:
@@ -1215,8 +1163,6 @@ error_pipe_state:
 void
 vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 {
-   unsigned i;
-
    assert(renderer);
 
    vl_idct_cleanup(&renderer->idct_y);
@@ -1225,8 +1171,6 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 
    util_delete_keymap(renderer->texview_map, renderer->pipe);
    cleanup_pipe_state(renderer);
-   for(i = 0; i<VL_NUM_MACROBLOCK_TYPES; ++i)
-      cleanup_mbtype_handler(renderer, i);
    cleanup_buffers(renderer);
 
    pipe_surface_reference(&renderer->surface, NULL);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 69fa4bb7f23..43dbee6f220 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -60,6 +60,7 @@ enum VL_MACROBLOCK_TYPE
 
 struct vl_mc_mbtype_handler
 {
+   void *vertex_elems_state;
    void *vs, *fs;
 };
 
@@ -79,12 +80,6 @@ struct vl_mpeg12_mc_renderer
 
    struct vl_idct idct_y, idct_cb, idct_cr;
 
-   union
-   {
-      void *all[3];
-      struct { void *i, *p, *b; } individual;
-   } vertex_elems_state;
-
    union
    {
       void *all[5];
-- 
cgit v1.2.3


From 838d1092077368d45674f5f5637186da5c69ca15 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 28 Nov 2010 20:20:47 +0100
Subject: use vl_vb_upload_quads also for mc

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 38 +-----------------------
 1 file changed, 1 insertion(+), 37 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index dbc900d2614..ab6f8b092fe 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -87,11 +87,6 @@ enum VS_OUTPUT
    VS_O_MV3
 };
 
-/* vertices for a quad covering a macroblock */
-static const struct vertex2f const_quad[4] = {
-   {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
-};
-
 static const unsigned const_mbtype_config[VL_NUM_MACROBLOCK_TYPES][2] = {
    [VL_MACROBLOCK_TYPE_INTRA]           = { 0, 0 },
    [VL_MACROBLOCK_TYPE_FWD_FRAME_PRED]  = { 1, 1 },
@@ -553,15 +548,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
       r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
    }
 
-   r->vertex_bufs.individual.rect.stride = sizeof(struct vertex2f);
-   r->vertex_bufs.individual.rect.max_index = 4 * r->macroblocks_per_batch - 1;
-   r->vertex_bufs.individual.rect.buffer_offset = 0;
-   r->vertex_bufs.individual.rect.buffer = pipe_buffer_create
-   (
-      r->pipe->screen,
-      PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vertex2f) * 4 * r->macroblocks_per_batch
-   );
+   r->vertex_bufs.individual.quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch);
 
    r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vert_stream_0);
    r->vertex_bufs.individual.ycbcr.max_index = 4 * r->macroblocks_per_batch - 1;
@@ -644,27 +631,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    return true;
 }
 
-static void
-init_const_buffers(struct vl_mpeg12_mc_renderer *r)
-{
-   struct pipe_transfer *buf_transfer;
-   struct vertex2f *rect;
-   unsigned i;
-
-   rect = pipe_buffer_map
-   (
-      r->pipe,
-      r->vertex_bufs.individual.rect.buffer,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buf_transfer
-   );
-
-   for ( i = 0; i < r->macroblocks_per_batch; ++i)
-     memcpy(rect + i * 4, &const_quad, sizeof(const_quad));
-
-   pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.rect.buffer, buf_transfer);
-}
-
 static void
 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
 {
@@ -1121,8 +1087,6 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    if (!init_buffers(renderer))
       goto error_buffers;
 
-   init_const_buffers(renderer);
-
    renderer->surface = NULL;
    renderer->past = NULL;
    renderer->future = NULL;
-- 
cgit v1.2.3


From c7068d79a06c374dd6e732a53436dd9bcdba7e0e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 28 Nov 2010 21:09:53 +0100
Subject: give each vertex element its own buffer

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 157 ++++++++++++-----------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   6 +-
 2 files changed, 85 insertions(+), 78 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index ab6f8b092fe..75d0f6ef57c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -48,18 +48,6 @@ struct vertex_shader_consts
    struct vertex4f norm;
 };
 
-struct fragment_shader_consts
-{
-   struct vertex4f multiplier;
-   struct vertex4f div;
-};
-
-struct vert_stream_0
-{
-   struct vertex2f pos;
-   float interlaced;
-};
-
 enum VS_INPUT
 {
    VS_I_RECT,
@@ -550,7 +538,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
 
    r->vertex_bufs.individual.quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch);
 
-   r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vert_stream_0);
+   r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f);
    r->vertex_bufs.individual.ycbcr.max_index = 4 * r->macroblocks_per_batch - 1;
    r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
    /* XXX: Create with usage DYNAMIC or STREAM */
@@ -558,19 +546,30 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    (
       r->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vert_stream_0) * 4 * r->macroblocks_per_batch
+      sizeof(struct vertex2f) * 4 * r->macroblocks_per_batch
    );
 
-   for (i = 0; i < 2; ++i) {
-      r->vertex_bufs.individual.ref[i].stride = sizeof(struct vertex2f) * 2;
-      r->vertex_bufs.individual.ref[i].max_index = 4 * r->macroblocks_per_batch - 1;
-      r->vertex_bufs.individual.ref[i].buffer_offset = 0;
+   r->vertex_bufs.individual.interlaced.stride = sizeof(float);
+   r->vertex_bufs.individual.interlaced.max_index = 4 * r->macroblocks_per_batch - 1;
+   r->vertex_bufs.individual.interlaced.buffer_offset = 0;
+   /* XXX: Create with usage DYNAMIC or STREAM */
+   r->vertex_bufs.individual.interlaced.buffer = pipe_buffer_create
+   (
+      r->pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      sizeof(float) * 4 * r->macroblocks_per_batch
+   );
+
+   for (i = 0; i < 4; ++i) {
+      r->vertex_bufs.individual.mv[i].stride = sizeof(struct vertex2f);
+      r->vertex_bufs.individual.mv[i].max_index = 4 * r->macroblocks_per_batch - 1;
+      r->vertex_bufs.individual.mv[i].buffer_offset = 0;
       /* XXX: Create with usage DYNAMIC or STREAM */
-      r->vertex_bufs.individual.ref[i].buffer = pipe_buffer_create
+      r->vertex_bufs.individual.mv[i].buffer = pipe_buffer_create
       (
          r->pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
-         sizeof(struct vertex2f) * 2 * 4 * r->macroblocks_per_batch
+         sizeof(struct vertex2f) * 4 * r->macroblocks_per_batch
       );
    }
 
@@ -589,33 +588,33 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* progressive=1.0f interlaced=0.0f */
-   vertex_elems[VS_I_INTERLACED].src_offset = sizeof(struct vertex2f);
+   vertex_elems[VS_I_INTERLACED].src_offset = 0;
    vertex_elems[VS_I_INTERLACED].instance_divisor = 0;
-   vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 1;
+   vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 2;
    vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
 
    /* First ref surface top field texcoord element */
    vertex_elems[VS_I_MV0].src_offset = 0;
    vertex_elems[VS_I_MV0].instance_divisor = 0;
-   vertex_elems[VS_I_MV0].vertex_buffer_index = 2;
+   vertex_elems[VS_I_MV0].vertex_buffer_index = 3;
    vertex_elems[VS_I_MV0].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* First ref surface bottom field texcoord element */
-   vertex_elems[VS_I_MV1].src_offset = sizeof(struct vertex2f);
+   vertex_elems[VS_I_MV1].src_offset = 0;
    vertex_elems[VS_I_MV1].instance_divisor = 0;
-   vertex_elems[VS_I_MV1].vertex_buffer_index = 2;
+   vertex_elems[VS_I_MV1].vertex_buffer_index = 4;
    vertex_elems[VS_I_MV1].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Second ref surface top field texcoord element */
    vertex_elems[VS_I_MV2].src_offset = 0;
    vertex_elems[VS_I_MV2].instance_divisor = 0;
-   vertex_elems[VS_I_MV2].vertex_buffer_index = 3;
+   vertex_elems[VS_I_MV2].vertex_buffer_index = 5;
    vertex_elems[VS_I_MV2].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Second ref surface bottom field texcoord element */
-   vertex_elems[VS_I_MV3].src_offset = sizeof(struct vertex2f);
+   vertex_elems[VS_I_MV3].src_offset = 0;
    vertex_elems[VS_I_MV3].instance_divisor = 0;
-   vertex_elems[VS_I_MV3].vertex_buffer_index = 3;
+   vertex_elems[VS_I_MV3].vertex_buffer_index = 6;
    vertex_elems[VS_I_MV3].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    for(i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i)
@@ -678,9 +677,9 @@ get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
 }
 
 void
-gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
+gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r, 
                      struct pipe_mpeg12_macroblock *mb, unsigned pos,
-                     struct vert_stream_0 *ycbcr_vb, struct vertex2f **ref_vb)
+                     struct vertex2f *ycbcr_vb, float *interlaced_vb, struct vertex2f **mv_vb)
 {
    struct vertex2f mo_vec[2];
 
@@ -689,7 +688,6 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
    assert(r);
    assert(mb);
    assert(ycbcr_vb);
-   assert(pos < r->macroblocks_per_batch);
 
    mo_vec[1].x = 0;
    mo_vec[1].y = 0;
@@ -697,19 +695,20 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
    switch (mb->mb_type) {
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
       {
-         struct vertex2f *vb;
+         struct vertex2f *vb[2];
 
-         assert(ref_vb && ref_vb[1]);
+         assert(mv_vb && mv_vb[2] && mv_vb[3]);
 
-         vb = ref_vb[1] + pos * 2 * 4;
+         vb[0] = mv_vb[2] + pos;
+         vb[1] = mv_vb[3] + pos;
 
          mo_vec[0].x = mb->pmv[0][1][0];
          mo_vec[0].y = mb->pmv[0][1][1];
 
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            for (i = 0; i < 4 * 2; i += 2) {
-               vb[i].x = mo_vec[0].x;
-               vb[i].y = mo_vec[0].y;
+            for (i = 0; i < 4; ++i) {
+               vb[0][i].x = mo_vec[0].x;
+               vb[0][i].y = mo_vec[0].y;
             }
          }
          else {
@@ -721,11 +720,11 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
             if(mb->mvfs[0][1]) mo_vec[0].y += 2;
             if(!mb->mvfs[1][1]) mo_vec[1].y -= 2;
 
-            for (i = 0; i < 4 * 2; i += 2) {
-               vb[i].x = mo_vec[0].x;
-               vb[i].y = mo_vec[0].y;
-               vb[i + 1].x = mo_vec[1].x;
-               vb[i + 1].y = mo_vec[1].y;
+            for (i = 0; i < 4; ++i) {
+               vb[0][i].x = mo_vec[0].x;
+               vb[0][i].y = mo_vec[0].y;
+               vb[1][i].x = mo_vec[1].x;
+               vb[1][i].y = mo_vec[1].y;
             }
          }
 
@@ -734,11 +733,12 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
       case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
       case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
       {
-         struct vertex2f *vb;
+         struct vertex2f *vb[2];
 
-         assert(ref_vb && ref_vb[0]);
+         assert(mv_vb && mv_vb[0] && mv_vb[1]);
 
-         vb = ref_vb[0] + pos * 2 * 4;
+         vb[0] = mv_vb[0] + pos;
+         vb[1] = mv_vb[1] + pos;
 
          if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
             mo_vec[0].x = mb->pmv[0][1][0];
@@ -770,17 +770,17 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
          }
 
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            for (i = 0; i < 4 * 2; i += 2) {
-               vb[i].x = mo_vec[0].x;
-               vb[i].y = mo_vec[0].y;
+            for (i = 0; i < 4; ++i) {
+               vb[0][i].x = mo_vec[0].x;
+               vb[0][i].y = mo_vec[0].y;
             }
          }
          else {
-            for (i = 0; i < 4 * 2; i += 2) {
-               vb[i].x = mo_vec[0].x;
-               vb[i].y = mo_vec[0].y;
-               vb[i + 1].x = mo_vec[1].x;
-               vb[i + 1].y = mo_vec[1].y;
+            for (i = 0; i < 4; ++i) {
+               vb[0][i].x = mo_vec[0].x;
+               vb[0][i].y = mo_vec[0].y;
+               vb[1][i].x = mo_vec[1].x;
+               vb[1][i].y = mo_vec[1].y;
             }
          }
 
@@ -788,16 +788,11 @@ gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r,
       }
       case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
       {
-         struct vert_stream_0 *vb = ycbcr_vb + pos * 4;
-         struct vert_stream_0 v;
-
-         v.pos.x = mb->mbx;
-         v.pos.y = mb->mby;
-
-         v.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
-
-         for ( i = 0; i < 4; ++i )
-            memcpy(vb + i, &v, sizeof(v));
+         for ( i = 0; i < 4; ++i ) {
+            ycbcr_vb[i + pos].x = mb->mbx;
+            ycbcr_vb[i + pos].y = mb->mby;
+            interlaced_vb[i + pos] = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
+         }
 
          break;
       }
@@ -811,9 +806,10 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
                       unsigned *num_macroblocks)
 {
    unsigned offset[VL_NUM_MACROBLOCK_TYPES];
-   struct vert_stream_0 *ycbcr_vb;
-   struct vertex2f *ref_vb[2];
-   struct pipe_transfer *buf_transfer[3];
+   struct vertex2f *ycbcr_vb;
+   float *interlaced_vb;
+   struct vertex2f *mv_vb[4];
+   struct pipe_transfer *buf_transfer[7];
    unsigned i;
 
    assert(r);
@@ -829,7 +825,7 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
    for (i = 1; i < VL_NUM_MACROBLOCK_TYPES; ++i)
       offset[i] = offset[i - 1] + num_macroblocks[i - 1];
 
-   ycbcr_vb = (struct vert_stream_0 *)pipe_buffer_map
+   ycbcr_vb = (struct vertex2f *)pipe_buffer_map
    (
       r->pipe,
       r->vertex_bufs.individual.ycbcr.buffer,
@@ -837,27 +833,36 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
       &buf_transfer[0]
    );
 
-   for (i = 0; i < 2; ++i)
-      ref_vb[i] = (struct vertex2f *)pipe_buffer_map
+   interlaced_vb = (float *)pipe_buffer_map
+   (
+      r->pipe,
+      r->vertex_bufs.individual.interlaced.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer[1]
+   );
+
+   for (i = 0; i < 4; ++i)
+      mv_vb[i] = (struct vertex2f *)pipe_buffer_map
       (
          r->pipe,
-         r->vertex_bufs.individual.ref[i].buffer,
+         r->vertex_bufs.individual.mv[i].buffer,
          PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-         &buf_transfer[i + 1]
+         &buf_transfer[i + 2]
       );
 
    for (i = 0; i < r->num_macroblocks; ++i) {
       enum VL_MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
 
-      gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type],
-                           ycbcr_vb, ref_vb);
+      gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type] * 4,
+                           ycbcr_vb, interlaced_vb, mv_vb);
 
       ++offset[mb_type];
    }
 
    pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
-   for (i = 0; i < 2; ++i)
-      pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ref[i].buffer, buf_transfer[i + 1]);
+   pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.interlaced.buffer, buf_transfer[1]);
+   for (i = 0; i < 4; ++i)
+      pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.mv[i].buffer, buf_transfer[i + 2]);
 }
 
 static struct pipe_sampler_view
@@ -901,7 +906,7 @@ flush_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE ty
 
    handler = &r->mbtype_handlers[type];
 
-   r->pipe->set_vertex_buffers(r->pipe, 2 + ref_frames, r->vertex_bufs.all);
+   r->pipe->set_vertex_buffers(r->pipe, 3 + ref_frames * mv_per_frame, r->vertex_bufs.all);
    r->pipe->bind_vertex_elements_state(r->pipe, handler->vertex_elems_state);
 
    if(ref_frames == 2) {
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 43dbee6f220..fd310e6ac3e 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -102,8 +102,10 @@ struct vl_mpeg12_mc_renderer
 
    union
    {
-      struct pipe_vertex_buffer all[4];
-      struct { struct pipe_vertex_buffer rect, ycbcr, ref[2]; } individual;
+      struct pipe_vertex_buffer all[7];
+      struct {
+         struct pipe_vertex_buffer quad, ycbcr, interlaced, mv[4];
+      } individual;
    } vertex_bufs;
 
    struct pipe_surface *surface, *past, *future;
-- 
cgit v1.2.3


From d2888c5f2f3ed8727ea973e839b54435eac31e89 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 29 Nov 2010 00:09:22 +0100
Subject: use vertex buffer also for mc code

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 316 +++++++++++------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   8 +-
 2 files changed, 162 insertions(+), 162 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 75d0f6ef57c..9224677a343 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -361,6 +361,7 @@ init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE typ
 {
    unsigned ref_frames, mv_per_frame;
    struct vl_mc_mbtype_handler *handler;
+   unsigned i;
 
    assert(r);
 
@@ -372,26 +373,54 @@ init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE typ
    handler->vs = create_vert_shader(r, ref_frames, mv_per_frame);
    handler->fs = create_frag_shader(r, ref_frames, mv_per_frame);
 
+   if (handler->vs == NULL || handler->fs == NULL)
+      return false;
+
    handler->vertex_elems_state = r->pipe->create_vertex_elements_state(
       r->pipe, 3 + ref_frames * mv_per_frame, vertex_elems);
 
-   return handler->vs != NULL &&
-          handler->fs != NULL &&
-          handler->vertex_elems_state != NULL;
+   if (handler->vertex_elems_state == NULL)
+      return false;
+
+   if (!vl_vb_init(&handler->pos, r->macroblocks_per_batch))
+      return false;
+
+   handler->interlaced = MALLOC(sizeof(float) * r->macroblocks_per_batch * 4);
+   if (handler->interlaced == NULL)
+      return false;
+
+   for (i = 0; i < 4 /*TODO: ref_frames * mv_per_frame */; ++i) {
+      handler->mv[i] = MALLOC(sizeof(struct vertex2f) * r->macroblocks_per_batch * 4);
+      if (handler->mv[i] == NULL)
+         return false;
+   }
+
+   return true;
 }
 
 static void
 cleanup_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type)
 {
+   unsigned ref_frames, mv_per_frame;
    struct vl_mc_mbtype_handler *handler;
+   unsigned i;
 
    assert(r);
 
+   ref_frames = const_mbtype_config[type][0];
+   mv_per_frame = const_mbtype_config[type][1];
+
    handler = &r->mbtype_handlers[type];
 
    r->pipe->delete_vs_state(r->pipe, handler->vs);
    r->pipe->delete_fs_state(r->pipe, handler->fs);
    r->pipe->delete_vertex_elements_state(r->pipe, handler->vertex_elems_state);
+
+   handler->interlaced = MALLOC(sizeof(float) * r->macroblocks_per_batch * 4);
+   FREE(handler->interlaced);
+
+   for (i = 0; i < 4 /*TODO: ref_frames * mv_per_frame */; ++i)
+      FREE(handler->mv[i]);
 }
 
 
@@ -493,7 +522,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    r->macroblocks_per_batch =
       mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
    r->num_macroblocks = 0;
-   r->macroblock_buf = MALLOC(r->macroblocks_per_batch * sizeof(struct pipe_mpeg12_macroblock));
 
    memset(&template, 0, sizeof(struct pipe_resource));
    template.target = PIPE_TEXTURE_2D;
@@ -647,8 +675,6 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
 
    for(i = 0; i<VL_NUM_MACROBLOCK_TYPES; ++i)
       cleanup_mbtype_handler(r, i);
-
-   FREE(r->macroblock_buf);
 }
 
 static enum VL_MACROBLOCK_TYPE
@@ -676,156 +702,22 @@ get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
    return -1;
 }
 
-void
-gen_macroblock_verts(struct vl_mpeg12_mc_renderer *r, 
-                     struct pipe_mpeg12_macroblock *mb, unsigned pos,
-                     struct vertex2f *ycbcr_vb, float *interlaced_vb, struct vertex2f **mv_vb)
-{
-   struct vertex2f mo_vec[2];
-
-   unsigned i;
-
-   assert(r);
-   assert(mb);
-   assert(ycbcr_vb);
-
-   mo_vec[1].x = 0;
-   mo_vec[1].y = 0;
-
-   switch (mb->mb_type) {
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
-      {
-         struct vertex2f *vb[2];
-
-         assert(mv_vb && mv_vb[2] && mv_vb[3]);
-
-         vb[0] = mv_vb[2] + pos;
-         vb[1] = mv_vb[3] + pos;
-
-         mo_vec[0].x = mb->pmv[0][1][0];
-         mo_vec[0].y = mb->pmv[0][1][1];
-
-         if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            for (i = 0; i < 4; ++i) {
-               vb[0][i].x = mo_vec[0].x;
-               vb[0][i].y = mo_vec[0].y;
-            }
-         }
-         else {
-            mo_vec[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
-
-            mo_vec[1].x = mb->pmv[1][1][0];
-            mo_vec[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
-
-            if(mb->mvfs[0][1]) mo_vec[0].y += 2;
-            if(!mb->mvfs[1][1]) mo_vec[1].y -= 2;
-
-            for (i = 0; i < 4; ++i) {
-               vb[0][i].x = mo_vec[0].x;
-               vb[0][i].y = mo_vec[0].y;
-               vb[1][i].x = mo_vec[1].x;
-               vb[1][i].y = mo_vec[1].y;
-            }
-         }
-
-         /* fall-through */
-      }
-      case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
-      {
-         struct vertex2f *vb[2];
-
-         assert(mv_vb && mv_vb[0] && mv_vb[1]);
-
-         vb[0] = mv_vb[0] + pos;
-         vb[1] = mv_vb[1] + pos;
-
-         if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
-            mo_vec[0].x = mb->pmv[0][1][0];
-            mo_vec[0].y = mb->pmv[0][1][1];
-
-            if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
-               mo_vec[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
-
-               mo_vec[1].x = mb->pmv[1][1][0];
-               mo_vec[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
-
-               if(mb->mvfs[0][1]) mo_vec[0].y += 2;
-               if(!mb->mvfs[1][1]) mo_vec[1].y -= 2;
-            }
-         }
-         else {
-            mo_vec[0].x = mb->pmv[0][0][0];
-            mo_vec[0].y = mb->pmv[0][0][1];
-
-            if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FIELD) {
-               mo_vec[0].y = mb->pmv[0][0][1] - (mb->pmv[0][0][1] % 4);
-
-               mo_vec[1].x = mb->pmv[1][0][0];
-               mo_vec[1].y = mb->pmv[1][0][1] - (mb->pmv[1][0][1] % 4);
-
-               if(mb->mvfs[0][0]) mo_vec[0].y += 2;
-               if(!mb->mvfs[1][0]) mo_vec[1].y -= 2;
-            }
-         }
-
-         if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            for (i = 0; i < 4; ++i) {
-               vb[0][i].x = mo_vec[0].x;
-               vb[0][i].y = mo_vec[0].y;
-            }
-         }
-         else {
-            for (i = 0; i < 4; ++i) {
-               vb[0][i].x = mo_vec[0].x;
-               vb[0][i].y = mo_vec[0].y;
-               vb[1][i].x = mo_vec[1].x;
-               vb[1][i].y = mo_vec[1].y;
-            }
-         }
-
-         /* fall-through */
-      }
-      case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
-      {
-         for ( i = 0; i < 4; ++i ) {
-            ycbcr_vb[i + pos].x = mb->mbx;
-            ycbcr_vb[i + pos].y = mb->mby;
-            interlaced_vb[i + pos] = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
-         }
-
-         break;
-      }
-      default:
-         assert(0);
-   }
-}
-
 static void
-gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
-                      unsigned *num_macroblocks)
+upload_vertex_stream(struct vl_mpeg12_mc_renderer *r,
+                      unsigned num_macroblocks[VL_NUM_MACROBLOCK_TYPES])
 {
-   unsigned offset[VL_NUM_MACROBLOCK_TYPES];
-   struct vertex2f *ycbcr_vb;
-   float *interlaced_vb;
-   struct vertex2f *mv_vb[4];
+   struct quadf *pos;
+   struct vertex2f *mv[4];
+   float *interlaced;
+
    struct pipe_transfer *buf_transfer[7];
-   unsigned i;
+
+   unsigned i, j;
 
    assert(r);
    assert(num_macroblocks);
 
-   for (i = 0; i < r->num_macroblocks; ++i) {
-      enum VL_MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
-      ++num_macroblocks[mb_type];
-   }
-
-   offset[0] = 0;
-
-   for (i = 1; i < VL_NUM_MACROBLOCK_TYPES; ++i)
-      offset[i] = offset[i - 1] + num_macroblocks[i - 1];
-
-   ycbcr_vb = (struct vertex2f *)pipe_buffer_map
+   pos = (struct quadf *)pipe_buffer_map
    (
       r->pipe,
       r->vertex_bufs.individual.ycbcr.buffer,
@@ -833,7 +725,7 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
       &buf_transfer[0]
    );
 
-   interlaced_vb = (float *)pipe_buffer_map
+   interlaced = (float *)pipe_buffer_map
    (
       r->pipe,
       r->vertex_bufs.individual.interlaced.buffer,
@@ -842,7 +734,7 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
    );
 
    for (i = 0; i < 4; ++i)
-      mv_vb[i] = (struct vertex2f *)pipe_buffer_map
+      mv[i] = (struct vertex2f *)pipe_buffer_map
       (
          r->pipe,
          r->vertex_bufs.individual.mv[i].buffer,
@@ -850,13 +742,21 @@ gen_macroblock_stream(struct vl_mpeg12_mc_renderer *r,
          &buf_transfer[i + 2]
       );
 
-   for (i = 0; i < r->num_macroblocks; ++i) {
-      enum VL_MACROBLOCK_TYPE mb_type = get_macroblock_type(&r->macroblock_buf[i]);
+   for (i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) {
+      struct vl_mc_mbtype_handler *handler = &r->mbtype_handlers[i];
+      unsigned count = vl_vb_upload(&handler->pos, pos);
+      if (count > 0) {
+         pos += count;
 
-      gen_macroblock_verts(r, &r->macroblock_buf[i], offset[mb_type] * 4,
-                           ycbcr_vb, interlaced_vb, mv_vb);
+         memcpy(interlaced, handler->interlaced, sizeof(float) * count * 4);
+         interlaced += count * 4;
 
-      ++offset[mb_type];
+         for (j = 0; j < 4 /* TODO */; ++j) {
+            memcpy(mv[j], handler->mv[j], sizeof(struct vertex2f) * count * 4);
+            mv[j] += count * 4;
+         }
+      }
+      num_macroblocks[i] = count;
    }
 
    pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
@@ -952,7 +852,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
    vl_idct_flush(&r->idct_cr);
    vl_idct_flush(&r->idct_cb);
 
-   gen_macroblock_stream(r, num_macroblocks);
+   upload_vertex_stream(r, num_macroblocks);
 
    r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
    r->pipe->set_viewport_state(r->pipe, &r->viewport);
@@ -990,6 +890,104 @@ update_render_target(struct vl_mpeg12_mc_renderer *r)
    r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, r->vs_const_buf);
 }
 
+static void
+get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4])
+{
+   switch (mb->mb_type) {
+      case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
+      {
+         if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+
+            mv[2].x = mb->pmv[0][1][0];
+            mv[2].y = mb->pmv[0][1][1];
+
+         } else {
+            mv[2].x = mb->pmv[0][1][0];
+            mv[2].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
+
+            mv[3].x = mb->pmv[1][1][0];
+            mv[3].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
+
+            if(mb->mvfs[0][1]) mv[2].y += 2;
+            if(!mb->mvfs[1][1]) mv[3].y -= 2;
+         }
+
+         /* fall-through */
+      }
+      case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
+      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
+      {
+         if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
+
+            if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+               mv[0].x = mb->pmv[0][1][0];
+               mv[0].y = mb->pmv[0][1][1];
+
+            } else {
+               mv[0].x = mb->pmv[0][1][0];
+               mv[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
+
+               mv[1].x = mb->pmv[1][1][0];
+               mv[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
+
+               if(mb->mvfs[0][1]) mv[0].y += 2;
+               if(!mb->mvfs[1][1]) mv[1].y -= 2;
+            }
+
+         } else {
+
+            if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+               mv[0].x = mb->pmv[0][0][0];
+               mv[0].y = mb->pmv[0][0][1];
+
+            } else {
+               mv[0].x = mb->pmv[0][0][0];
+               mv[0].y = mb->pmv[0][0][1] - (mb->pmv[0][0][1] % 4);
+
+               mv[1].x = mb->pmv[1][0][0];
+               mv[1].y = mb->pmv[1][0][1] - (mb->pmv[1][0][1] % 4);
+
+               if(mb->mvfs[0][0]) mv[0].y += 2;
+               if(!mb->mvfs[1][0]) mv[1].y -= 2;
+            }
+         }
+      }
+      default:
+         break;
+   }
+}
+
+static void
+grab_vectors(struct vl_mpeg12_mc_renderer *r,
+             struct pipe_mpeg12_macroblock *mb)
+{
+   enum VL_MACROBLOCK_TYPE type;
+   struct vl_mc_mbtype_handler *handler;
+   struct vertex2f mv[4];
+   unsigned ref_frames, mv_per_frame;
+   unsigned i, j, pos;
+
+   assert(r);
+   assert(mb);
+
+   type = get_macroblock_type(mb);
+
+   ref_frames = const_mbtype_config[type][0];
+   mv_per_frame = const_mbtype_config[type][1];
+
+   handler = &r->mbtype_handlers[type];
+
+   pos = handler->pos.num_blocks * 4;
+   vl_vb_add_block(&handler->pos, false, mb->mbx, mb->mby);
+
+   get_motion_vectors(mb, mv);
+   for ( i = 0; i < 4; ++i ) {
+      handler->interlaced[i + pos] = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
+      for ( j = 0; j < 4 /*TODO: ref_frames * mv_per_frame */; ++j )
+         handler->mv[j][i + pos] = mv[j];
+   }
+}
+
 static void
 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
             enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
@@ -1030,9 +1028,7 @@ grab_macroblock(struct vl_mpeg12_mc_renderer *r,
    assert(mb->blocks);
    assert(r->num_macroblocks < r->macroblocks_per_batch);
 
-   memcpy(&r->macroblock_buf[r->num_macroblocks], mb,
-          sizeof(struct pipe_mpeg12_macroblock));
-
+   grab_vectors(r, mb);
    grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
 
    ++r->num_macroblocks;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index fd310e6ac3e..c6e9ef880ef 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -33,6 +33,7 @@
 #include <pipe/p_video_state.h>
 #include "vl_types.h"
 #include "vl_idct.h"
+#include "vl_vertex_buffers.h"
 
 struct pipe_context;
 struct pipe_macroblock;
@@ -60,8 +61,12 @@ enum VL_MACROBLOCK_TYPE
 
 struct vl_mc_mbtype_handler
 {
-   void *vertex_elems_state;
    void *vs, *fs;
+   void *vertex_elems_state;
+
+   struct vl_vertex_buffer pos;
+   float *interlaced;
+   struct vertex2f *mv[4];
 };
 
 struct vl_mpeg12_mc_renderer
@@ -111,7 +116,6 @@ struct vl_mpeg12_mc_renderer
    struct pipe_surface *surface, *past, *future;
    struct pipe_fence_handle **fence;
    unsigned num_macroblocks;
-   struct pipe_mpeg12_macroblock *macroblock_buf;
 
    struct keymap *texview_map;
 };
-- 
cgit v1.2.3


From 69f53c3dc89944c6a089e5d2b7a643baef9339e1 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 29 Nov 2010 01:02:37 +0100
Subject: copy only mv really needed to vb

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 25 +++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 9224677a343..c219c050a1b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -121,13 +121,10 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    for (i = 0; i < ref_frames; ++i) {
       for (j = 0; j < 2; ++j) {        
         if(j < mv_per_frame) {
-           vmv[count] = ureg_DECL_vs_input(shader, VS_I_MV0 + i * 2 + j);
+           vmv[count] = ureg_DECL_vs_input(shader, VS_I_MV0 + count);
            o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + count);
            count++;
         }
-        /* workaround for r600g */
-        else if(ref_frames == 2)
-           ureg_DECL_vs_input(shader, VS_I_MV0 + i * 2 + j);
       }
    }
 
@@ -389,7 +386,7 @@ init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE typ
    if (handler->interlaced == NULL)
       return false;
 
-   for (i = 0; i < 4 /*TODO: ref_frames * mv_per_frame */; ++i) {
+   for (i = 0; i < ref_frames * mv_per_frame; ++i) {
       handler->mv[i] = MALLOC(sizeof(struct vertex2f) * r->macroblocks_per_batch * 4);
       if (handler->mv[i] == NULL)
          return false;
@@ -419,7 +416,7 @@ cleanup_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE
    handler->interlaced = MALLOC(sizeof(float) * r->macroblocks_per_batch * 4);
    FREE(handler->interlaced);
 
-   for (i = 0; i < 4 /*TODO: ref_frames * mv_per_frame */; ++i)
+   for (i = 0; i < ref_frames * mv_per_frame; ++i)
       FREE(handler->mv[i]);
 }
 
@@ -746,15 +743,21 @@ upload_vertex_stream(struct vl_mpeg12_mc_renderer *r,
       struct vl_mc_mbtype_handler *handler = &r->mbtype_handlers[i];
       unsigned count = vl_vb_upload(&handler->pos, pos);
       if (count > 0) {
+         unsigned ref_frames, mv_per_frame;
+
+         ref_frames = const_mbtype_config[i][0];
+         mv_per_frame = const_mbtype_config[i][1];
+
          pos += count;
 
          memcpy(interlaced, handler->interlaced, sizeof(float) * count * 4);
          interlaced += count * 4;
 
-         for (j = 0; j < 4 /* TODO */; ++j) {
+         for (j = 0; j < ref_frames * mv_per_frame; ++j)
             memcpy(mv[j], handler->mv[j], sizeof(struct vertex2f) * count * 4);
+
+         for (j = 0; j < 4; ++j)
             mv[j] += count * 4;
-         }
       }
       num_macroblocks[i] = count;
    }
@@ -898,8 +901,8 @@ get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4])
       {
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
 
-            mv[2].x = mb->pmv[0][1][0];
-            mv[2].y = mb->pmv[0][1][1];
+            mv[1].x = mb->pmv[0][1][0];
+            mv[1].y = mb->pmv[0][1][1];
 
          } else {
             mv[2].x = mb->pmv[0][1][0];
@@ -983,7 +986,7 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
    get_motion_vectors(mb, mv);
    for ( i = 0; i < 4; ++i ) {
       handler->interlaced[i + pos] = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
-      for ( j = 0; j < 4 /*TODO: ref_frames * mv_per_frame */; ++j )
+      for ( j = 0; j < ref_frames * mv_per_frame; ++j )
          handler->mv[j][i + pos] = mv[j];
    }
 }
-- 
cgit v1.2.3


From 74c71f09f3d321963b738acfb0bfd30b1e1efaeb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 30 Nov 2010 00:14:59 +0100
Subject: move to four component calculation for idct code

---
 src/gallium/auxiliary/vl/vl_idct.c | 122 ++++++++++++++++---------------------
 1 file changed, 52 insertions(+), 70 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 22feff8d8d4..80894b49bbb 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -137,56 +137,15 @@ create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
    return ureg_create_shader_and_destroy(shader, idct->pipe);
 }
 
-static void
-fetch_one(struct ureg_program *shader, struct ureg_dst m[2],
-          struct ureg_src tc, struct ureg_src sampler,
-          struct ureg_src start, struct ureg_src block, float height)
-{
-   struct ureg_dst t_tc, tmp;
-   unsigned i, j;
-
-   t_tc = ureg_DECL_temporary(shader);
-   tmp = ureg_DECL_temporary(shader);
-
-   m[0] = ureg_DECL_temporary(shader);
-   m[1] = ureg_DECL_temporary(shader);
-
-   /*
-    * t_tc.x = right_side ? start.x : tc.x
-    * t_tc.y = right_side ? tc.y : start.y
-    * m[0..1].xyzw = tex(t_tc++, sampler)
-    */
-   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(tc, TGSI_SWIZZLE_X));
-   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(start, TGSI_SWIZZLE_Y));
-
-#if NR_RENDER_TARGETS == 8
-   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(block, TGSI_SWIZZLE_X));
-#else
-   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
-#endif
-
-   for(i = 0; i < 2; ++i) {
-      for(j = 0; j < 4; ++j) {
-         /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
-         ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
-         ureg_MOV(shader, ureg_writemask(m[i], TGSI_WRITEMASK_X << j), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
-
-         if(i != 1 || j != 3) /* skip the last add */
-            ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y),
-               ureg_src(t_tc), ureg_imm1f(shader, 1.0f / height));
-      }
-   }
-
-   ureg_release_temporary(shader, t_tc);
-   ureg_release_temporary(shader, tmp);
-}
-
 static void
 fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
            struct ureg_src tc, struct ureg_src sampler,
-           struct ureg_src start, bool right_side, float width)
+           struct ureg_src start, struct ureg_src block,
+           bool right_side, bool transposed, float size)
 {
    struct ureg_dst t_tc;
+   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
+   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
 
    t_tc = ureg_DECL_temporary(shader);
    m[0] = ureg_DECL_temporary(shader);
@@ -197,17 +156,23 @@ fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
     * t_tc.y = right_side ? tc.y : start.y
     * m[0..1] = tex(t_tc++, sampler)
     */
-   if(right_side) {
-      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_Y));
-      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_X));
+   if(!right_side) {
+      ureg_MOV(shader, ureg_writemask(t_tc, wm_start), ureg_scalar(start, TGSI_SWIZZLE_X));
+      ureg_MOV(shader, ureg_writemask(t_tc, wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_Y));
    } else {
-      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_scalar(start, TGSI_SWIZZLE_X));
-      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), ureg_scalar(tc, TGSI_SWIZZLE_Y));
+      ureg_MOV(shader, ureg_writemask(t_tc, wm_start), ureg_scalar(start, TGSI_SWIZZLE_Y));
+      ureg_MOV(shader, ureg_writemask(t_tc, wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_X));
    }
 
-   ureg_TEX(shader, m[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
-   ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / width));
-   ureg_TEX(shader, m[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler);
+#if NR_RENDER_TARGETS == 8
+   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(block, TGSI_SWIZZLE_X));
+#else
+   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
+#endif
+
+   ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
+   ureg_ADD(shader, ureg_writemask(t_tc, wm_start), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / size));
+   ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
 
    ureg_release_temporary(shader, t_tc);
 }
@@ -228,7 +193,9 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2
     */
    ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
    ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(l[1]), ureg_src(r[1]));
-   ureg_ADD(shader, dst, ureg_src(tmp[0]), ureg_src(tmp[1]));
+   ureg_ADD(shader, dst,
+      ureg_scalar(ureg_src(tmp[0]), TGSI_SWIZZLE_X),
+      ureg_scalar(ureg_src(tmp[1]), TGSI_SWIZZLE_X));
 
    for(i = 0; i < 2; ++i) {
       ureg_release_temporary(shader, tmp[i]);
@@ -262,8 +229,8 @@ create_transpose_frag_shader(struct vl_idct *idct)
    start[0] = ureg_imm1f(shader, 0.0f);
    start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
 
-   fetch_four(shader, m[0], block, sampler[0], start[0], false, transpose->width0);
-   fetch_one(shader, m[1], tex, sampler[1], start[1], block, intermediate->height0);
+   fetch_four(shader, m[0], block, sampler[0], start[0], block, false, false, transpose->width0);
+   fetch_four(shader, m[1], tex, sampler[1], start[1], block, true, false, intermediate->height0);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -290,13 +257,13 @@ create_matrix_frag_shader(struct vl_idct *idct)
 
    struct ureg_program *shader;
 
-   struct ureg_src tc[2], sampler[2];
+   struct ureg_src tex, block, sampler[2];
    struct ureg_src start[2];
 
-   struct ureg_dst l[2], r[2];
+   struct ureg_dst l[4][2], r[2];
    struct ureg_dst t_tc, tmp, fragment[NR_RENDER_TARGETS];
 
-   unsigned i;
+   unsigned i, j;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -305,8 +272,8 @@ create_matrix_frag_shader(struct vl_idct *idct)
    t_tc = ureg_DECL_temporary(shader);
    tmp = ureg_DECL_temporary(shader);
 
-   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
-   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
+   tex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
+   block = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
 
    sampler[0] = ureg_DECL_sampler(shader, 1);
    sampler[1] = ureg_DECL_sampler(shader, 0);
@@ -317,30 +284,44 @@ create_matrix_frag_shader(struct vl_idct *idct)
    for (i = 0; i < NR_RENDER_TARGETS; ++i)
        fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
 
-   fetch_four(shader, l, tc[0], sampler[0], start[0], false, source->width0);
-   ureg_MUL(shader, l[0], ureg_src(l[0]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
-   ureg_MUL(shader, l[1], ureg_src(l[1]), ureg_scalar(ureg_imm1f(shader, STAGE1_SCALE), TGSI_SWIZZLE_X));
+   /* pixel center is at 0.5 not 0.0 !!! */
+   ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), 
+      tex, ureg_imm1f(shader, -2.0f / source->height0));
+
+   for (i = 0; i < 4; ++i) {
+      fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], block, false, false, source->width0);
+      ureg_MUL(shader, l[i][0], ureg_src(l[i][0]), ureg_imm1f(shader, STAGE1_SCALE));
+      ureg_MUL(shader, l[i][1], ureg_src(l[i][1]), ureg_imm1f(shader, STAGE1_SCALE));
+      if(i != 3)
+         ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), 
+            ureg_src(t_tc), ureg_imm1f(shader, 1.0f / source->height0));
+   }
    
    for (i = 0; i < NR_RENDER_TARGETS; ++i) {
 
 #if NR_RENDER_TARGETS == 8
       ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i));
-      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], true, matrix->width0);
+      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], block, true, true, matrix->width0);
 #elif NR_RENDER_TARGETS == 1
-      fetch_four(shader, r, tc[1], sampler[1], start[1], true, matrix->width0);
+      fetch_four(shader, r, block, sampler[1], start[1], block, true, true, matrix->width0);
 #else
 #error invalid number of render targets
 #endif
 
-      matrix_mul(shader, fragment[i], l, r);
+      for (j = 0; j < 4; ++j) {
+         matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
+      }
       ureg_release_temporary(shader, r[0]);
       ureg_release_temporary(shader, r[1]);
    }
 
    ureg_release_temporary(shader, t_tc);
    ureg_release_temporary(shader, tmp);
-   ureg_release_temporary(shader, l[0]);
-   ureg_release_temporary(shader, l[1]);
+
+   for (i = 0; i < 4; ++i) {
+      ureg_release_temporary(shader, l[i][0]);
+      ureg_release_temporary(shader, l[i][1]);
+   }
 
    ureg_END(shader);
 
@@ -420,8 +401,9 @@ init_buffers(struct vl_idct *idct)
    idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
    template.target = PIPE_TEXTURE_3D;
-   template.format = PIPE_FORMAT_R16_SNORM;
+   template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
    template.width0 = idct->destination->width0 / NR_RENDER_TARGETS;
+   template.height0 = idct->destination->height0 / 4;
    template.depth0 = NR_RENDER_TARGETS;
    template.usage = PIPE_USAGE_STATIC;
    idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
-- 
cgit v1.2.3


From c8b7cf469fa9db9e7ee459eec2d61f485e7961b8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 30 Nov 2010 20:41:26 +0100
Subject: cleanup and use CMP instead of IF ELSE ENDIF

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 30 ++++++++----------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index c219c050a1b..13894bba5a8 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -68,7 +68,6 @@ enum VS_OUTPUT
    VS_O_TEX0,
    VS_O_TEX1,
    VS_O_TEX2,
-   VS_O_INTERLACED,
    VS_O_MV0,
    VS_O_MV1,
    VS_O_MV2,
@@ -92,7 +91,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    struct ureg_src norm, mbs;
    struct ureg_src vrect, vpos, interlaced, vmv[4];
    struct ureg_dst scale, t_vpos, t_vtex;
-   struct ureg_dst o_vpos, o_line, o_vtex[3], o_interlaced, o_vmv[4];
+   struct ureg_dst o_vpos, o_line, o_vtex[3], o_vmv[4];
    unsigned i, j, count, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -115,7 +114,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
    o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
    o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);
-   o_interlaced = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INTERLACED);
    
    count=0;
    for (i = 0; i < ref_frames; ++i) {
@@ -153,7 +151,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
     *
     * if(count > 0) { // Apply motion vectors
     *    scale = norm * 0.5;
-    *    o_vmv[0..count] = t_vpos + vmv[0..4] * scale
+    *    o_vmv[0..count] = t_vpos + vmv[0..count] * scale
     * }
     *
     */
@@ -184,8 +182,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_ENDIF(shader);
    ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
 
-   ureg_MOV(shader, o_interlaced, interlaced);
-
    if(count > 0) {
       ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, ureg_imm1f(shader, 0.5f));
       for (i = 0; i < count; ++i)
@@ -213,8 +209,8 @@ calc_field(struct ureg_program *shader)
    /*
     * line going from 0 to 8 in steps of 0.5
     *
-    * tmp.z = fraction(line.y)
-    * tmp.z = tmp.z >= 0.5 ? 1 : 0
+    * tmp.y = fraction(line.y)
+    * tmp.y = tmp.y >= 0.5 ? 1 : 0
     */
    ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
    ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
@@ -225,10 +221,10 @@ calc_field(struct ureg_program *shader)
 static struct ureg_dst
 fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src tc[3], interlaced;
+   struct ureg_src tc[3];
    struct ureg_src sampler[3];
    struct ureg_dst texel, t_tc, tmp;
-   unsigned i, label;
+   unsigned i;
 
    texel = ureg_DECL_temporary(shader);
    t_tc = ureg_DECL_temporary(shader);
@@ -238,27 +234,21 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
    tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR);
 
-   interlaced = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INTERLACED, TGSI_INTERPOLATE_CONSTANT);
-
    for (i = 0; i < 3; ++i)  {
       sampler[i] = ureg_DECL_sampler(shader, i);
    }
 
    /*
-    * texel.y  = tex(field ? tc[1] : tc[0], sampler[0])
+    * texel.y  = tex(field.y ? tc[1] : tc[0], sampler[0])
     * texel.cb = tex(tc[2], sampler[1])
     * texel.cr = tex(tc[2], sampler[2])
     */
-   ureg_MUL(shader, tmp, interlaced, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y));
-
    for (i = 0; i < 3; ++i) {
       if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
 
-         ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), &label);
-            ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[1]);
-         ureg_ELSE(shader, &label);
-            ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[0]);
-         ureg_ENDIF(shader);
+         ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[1], tc[0]);
 
       } else {
          ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[2]);
-- 
cgit v1.2.3


From 27016941bca40a6563dd2122369745351102bc0c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 30 Nov 2010 21:15:47 +0100
Subject: use CMP also for referenz frame fetch

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 114 ++++++++++++++---------
 1 file changed, 71 insertions(+), 43 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 13894bba5a8..70bb7564131 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -265,21 +265,13 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
    return texel;
 }
 
-static void *
-create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
+static struct ureg_dst
+fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frames, unsigned mv_per_frame)
 {
-   struct ureg_program *shader;
-   struct ureg_src tc[ref_frames * mv_per_frame], sampler[ref_frames], result;
-   struct ureg_dst field, texel, ref[ref_frames];
-   struct ureg_dst fragment;
+   struct ureg_src tc[ref_frames * mv_per_frame], sampler[ref_frames];
+   struct ureg_dst ref[ref_frames], t_tc, result;
    unsigned i, label;
 
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
-
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
    for (i = 0; i < ref_frames * mv_per_frame; ++i)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
 
@@ -288,53 +280,89 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
       ref[i] = ureg_DECL_temporary(shader);
    }
 
-   field = calc_field(shader);
-   texel = fetch_ycbcr(r, shader, field);
+   result = ureg_DECL_temporary(shader);
 
-   switch(ref_frames) {
-   case 0:
-      result = ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X);
-      break;
-
-   case 1:
+   if (ref_frames == 1) {
       if(mv_per_frame == 1)
-         ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
+         /*
+          * result = tex(tc[0], sampler[0])
+          */
+         ureg_TEX(shader, result, TGSI_TEXTURE_2D, tc[0], sampler[0]);
       else {
-         ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
-            ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
-         ureg_ELSE(shader, &label);
-            ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
-         ureg_ENDIF(shader);
+         t_tc = ureg_DECL_temporary(shader);
+         /*
+          * result = tex(field.y ? tc[1] : tc[0], sampler[0])
+          */
+         ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[1], tc[0]);
+         ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]);
+
+         ureg_release_temporary(shader, t_tc);
       }
-      result = ureg_src(ref[0]);
-      break;
 
-   case 2:
+   } else if (ref_frames == 2) {
       if(mv_per_frame == 1) {
+         /*
+          * ref[0..1] = tex(tc[0..1], sampler[0..1])
+          */
          ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
          ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]);
       } else {
-         ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &label);
-            ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[1], sampler[0]);
-            ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[3], sampler[1]);
-         ureg_ELSE(shader, &label);
-            ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
-            ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[2], sampler[1]);
-         ureg_ENDIF(shader);
-      }
+         t_tc = ureg_DECL_temporary(shader);
+
+         /*
+          * if (field.y)
+          *    ref[0..1] = tex(tc[0..1], sampler[0..1])
+          * else
+          *    ref[0..1] = tex(tc[2..3], sampler[0..1])
+          */
+         ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[1], tc[0]);
+         ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]);
 
-      ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
-      result = ureg_src(ref[0]);
-      break;
+         ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[3], tc[2]);
+         ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[1]);
 
-   default:
-      assert(0);
+         ureg_release_temporary(shader, t_tc);
+      }
+
+      ureg_LRP(shader, result, ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
    }
-   ureg_ADD(shader, fragment, ureg_src(texel), result);
 
    for (i = 0; i < ref_frames; ++i)
       ureg_release_temporary(shader, ref[i]);
 
+   return result;
+}
+
+static void *
+create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
+{
+   struct ureg_program *shader;
+   struct ureg_src result;
+   struct ureg_dst field, texel;
+   struct ureg_dst fragment;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   field = calc_field(shader);
+   texel = fetch_ycbcr(r, shader, field);
+
+   if (ref_frames == 0)
+      result = ureg_imm1f(shader, 0.5f);
+   else
+      result = ureg_src(fetch_ref(shader, field, ref_frames, mv_per_frame));
+
+   ureg_ADD(shader, fragment, ureg_src(texel), result);
+
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
    ureg_END(shader);
-- 
cgit v1.2.3


From 29840040af128fe3f3542defd9448e1f66c23f03 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 1 Dec 2010 18:47:11 +0100
Subject: add rasterizer state

---
 src/gallium/auxiliary/vl/vl_idct.c               | 19 ++++++++++++++++---
 src/gallium/auxiliary/vl/vl_idct.h               |  2 ++
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 16 ++++++++++++++--
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  2 ++
 4 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 80894b49bbb..d9872cb0ea2 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -285,9 +285,9 @@ create_matrix_frag_shader(struct vl_idct *idct)
        fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
 
    /* pixel center is at 0.5 not 0.0 !!! */
-   ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), 
-      tex, ureg_imm1f(shader, -2.0f / source->height0));
-
+   //ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), 
+   //   tex, ureg_imm1f(shader, -2.0f / source->height0));
+   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), tex);
    for (i = 0; i < 4; ++i) {
       fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], block, false, false, source->width0);
       ureg_MUL(shader, l[i][0], ureg_src(l[i][0]), ureg_imm1f(shader, STAGE1_SCALE));
@@ -472,6 +472,7 @@ static void
 init_state(struct vl_idct *idct)
 {
    struct pipe_sampler_state sampler;
+   struct pipe_rasterizer_state rs_state;
    unsigned i;
 
    idct->viewport[0].scale[0] = idct->textures.individual.intermediate->width0;
@@ -528,6 +529,14 @@ init_state(struct vl_idct *idct)
       /*sampler.max_anisotropy = ; */
       idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
    }
+
+   memset(&rs_state, 0, sizeof(rs_state));
+   /*rs_state.sprite_coord_enable */
+   rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
+   rs_state.point_quad_rasterization = true;
+   rs_state.point_size = BLOCK_WIDTH;
+   rs_state.gl_rasterization_rules = false;
+   idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
 }
 
 static void
@@ -543,6 +552,8 @@ cleanup_state(struct vl_idct *idct)
 
    for (i = 0; i < 4; ++i)
       idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
+
+   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
 }
 
 struct pipe_resource *
@@ -730,6 +741,8 @@ vl_idct_flush(struct vl_idct *idct)
 
    if(num_blocks > 0) {
 
+      idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
+
       /* first stage */
       idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]);
       idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[0]);
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 94a5c73977f..462863b0008 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -42,6 +42,8 @@ struct vl_idct
 
    struct pipe_resource *destination;
 
+   void *rs_state;
+
    void *vertex_elems_state;
 
    union
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 70bb7564131..5e928e894e7 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -270,7 +270,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frame
 {
    struct ureg_src tc[ref_frames * mv_per_frame], sampler[ref_frames];
    struct ureg_dst ref[ref_frames], t_tc, result;
-   unsigned i, label;
+   unsigned i;
 
    for (i = 0; i < ref_frames * mv_per_frame; ++i)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
@@ -443,6 +443,7 @@ static bool
 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_sampler_state sampler;
+   struct pipe_rasterizer_state rs_state;
    unsigned filters[5];
    unsigned i;
 
@@ -469,7 +470,7 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    /* Luma filter */
    filters[0] = PIPE_TEX_FILTER_NEAREST;
    /* Chroma filters */
-   if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 || true) { //TODO
+   if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
       filters[1] = PIPE_TEX_FILTER_NEAREST;
       filters[2] = PIPE_TEX_FILTER_NEAREST;
    }
@@ -504,6 +505,14 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
       r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
    }
 
+   memset(&rs_state, 0, sizeof(rs_state));
+   /*rs_state.sprite_coord_enable */
+   rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
+   rs_state.point_quad_rasterization = true;
+   rs_state.point_size = BLOCK_WIDTH;
+   rs_state.gl_rasterization_rules = true;
+   r->rs_state = r->pipe->create_rasterizer_state(r->pipe, &rs_state);
+
    return true;
 }
 
@@ -516,6 +525,8 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
 
    for (i = 0; i < 5; ++i)
       r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
+
+   r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
 }
 
 static bool
@@ -875,6 +886,7 @@ flush(struct vl_mpeg12_mc_renderer *r)
 
    upload_vertex_stream(r, num_macroblocks);
 
+   r->pipe->bind_rasterizer_state(r->pipe, r->rs_state);
    r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
    r->pipe->set_viewport_state(r->pipe, &r->viewport);
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index c6e9ef880ef..2b6c21aa5c2 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -85,6 +85,8 @@ struct vl_mpeg12_mc_renderer
 
    struct vl_idct idct_y, idct_cb, idct_cr;
 
+   void *rs_state;
+
    union
    {
       void *all[5];
-- 
cgit v1.2.3


From 9af3c243d958dd5b9802dda321ab980c83cb8cb8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 3 Dec 2010 18:57:23 +0100
Subject: move empty block handling back into mc for testing

---
 src/gallium/auxiliary/vl/vl_idct.c               |  43 +----
 src/gallium/auxiliary/vl/vl_idct.h               |   1 -
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 225 +++++++++++++++++------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  14 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     |  18 +-
 5 files changed, 190 insertions(+), 111 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index d9872cb0ea2..b65a808a592 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -284,9 +284,6 @@ create_matrix_frag_shader(struct vl_idct *idct)
    for (i = 0; i < NR_RENDER_TARGETS; ++i)
        fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
 
-   /* pixel center is at 0.5 not 0.0 !!! */
-   //ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), 
-   //   tex, ureg_imm1f(shader, -2.0f / source->height0));
    ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), tex);
    for (i = 0; i < 4; ++i) {
       fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], block, false, false, source->width0);
@@ -664,13 +661,6 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
       return false;
    }
 
-   if(!vl_vb_init(&idct->empty_blocks, idct->max_blocks)) {
-      vl_vb_cleanup(&idct->blocks);
-      cleanup_shaders(idct);
-      cleanup_buffers(idct);
-      return false;
-   }
-
    init_state(idct);
 
    xfer_buffers_map(idct);
@@ -682,7 +672,6 @@ void
 vl_idct_cleanup(struct vl_idct *idct)
 {
    vl_vb_cleanup(&idct->blocks);
-   vl_vb_cleanup(&idct->empty_blocks);
    cleanup_shaders(idct);
    cleanup_buffers(idct);
 
@@ -701,18 +690,13 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 
    assert(idct);
 
-   if(block) {
-      tex_pitch = idct->tex_transfer->stride / sizeof(short);
-      texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
+   tex_pitch = idct->tex_transfer->stride / sizeof(short);
+   texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
 
-      for (i = 0; i < BLOCK_HEIGHT; ++i)
-         memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
+   for (i = 0; i < BLOCK_HEIGHT; ++i)
+      memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
 
-      vl_vb_add_block(&idct->blocks, false, x, y);
-   } else {
-
-      vl_vb_add_block(&idct->empty_blocks, true, x, y);
-   }
+   vl_vb_add_block(&idct->blocks, x, y);
 }
 
 void
@@ -720,7 +704,7 @@ vl_idct_flush(struct vl_idct *idct)
 {
    struct pipe_transfer *vec_transfer;
    struct quadf *vectors;
-   unsigned num_blocks, num_empty_blocks;
+   unsigned num_blocks;
 
    assert(idct);
 
@@ -733,7 +717,6 @@ vl_idct_flush(struct vl_idct *idct)
    );
 
    num_blocks = vl_vb_upload(&idct->blocks, vectors);
-   num_empty_blocks = vl_vb_upload(&idct->empty_blocks, vectors + num_blocks);
 
    pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, vec_transfer);
 
@@ -770,19 +753,5 @@ vl_idct_flush(struct vl_idct *idct)
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
    }
 
-   if(num_empty_blocks > 0) {
-
-      /* empty block handling */
-      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
-      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
-
-      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
-      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
-      idct->pipe->bind_vs_state(idct->pipe, idct->eb_vs);
-      idct->pipe->bind_fs_state(idct->pipe, idct->eb_fs);
-
-      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, num_blocks * 4, num_empty_blocks * 4);
-   }
-
    xfer_buffers_map(idct);
 }
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 462863b0008..3388d91f353 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -86,7 +86,6 @@ struct vl_idct
    } vertex_bufs;
 
    struct vl_vertex_buffer blocks;
-   struct vl_vertex_buffer empty_blocks;
 
    struct pipe_transfer *tex_transfer;
    short *texels;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 5e928e894e7..76db8c1b211 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -52,6 +52,10 @@ enum VS_INPUT
 {
    VS_I_RECT,
    VS_I_VPOS,
+   VS_I_EB_0_0,
+   VS_I_EB_0_1,
+   VS_I_EB_1_0,
+   VS_I_EB_1_1,
    VS_I_INTERLACED,
    VS_I_MV0,
    VS_I_MV1,
@@ -68,6 +72,11 @@ enum VS_OUTPUT
    VS_O_TEX0,
    VS_O_TEX1,
    VS_O_TEX2,
+   VS_O_EB_0_0,
+   VS_O_EB_0_1,
+   VS_O_EB_1_0,
+   VS_O_EB_1_1,
+   VS_O_INTERLACED,
    VS_O_MV0,
    VS_O_MV1,
    VS_O_MV2,
@@ -89,9 +98,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 {
    struct ureg_program *shader;
    struct ureg_src norm, mbs;
-   struct ureg_src vrect, vpos, interlaced, vmv[4];
+   struct ureg_src vrect, vpos, eb[2][2], interlaced, vmv[4];
    struct ureg_dst scale, t_vpos, t_vtex;
-   struct ureg_dst o_vpos, o_line, o_vtex[3], o_vmv[4];
+   struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2][2], o_interlaced, o_vmv[4];
    unsigned i, j, count, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -107,13 +116,22 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+   eb[0][0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0);
+   eb[1][0] = ureg_DECL_vs_input(shader, VS_I_EB_1_0);
+   eb[0][1] = ureg_DECL_vs_input(shader, VS_I_EB_0_1);
+   eb[1][1] = ureg_DECL_vs_input(shader, VS_I_EB_1_1);
    interlaced = ureg_DECL_vs_input(shader, VS_I_INTERLACED);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
-   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
+   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);   
    o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
    o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
-   o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);
+   o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);   
+   o_eb[0][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_0);
+   o_eb[0][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_1);
+   o_eb[1][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_0);
+   o_eb[1][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_1);
+   o_interlaced = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INTERLACED);
    
    count=0;
    for (i = 0; i < ref_frames; ++i) {
@@ -148,6 +166,8 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
     *    o_vtex[0..1].xy = t_vpos
     * }
     * o_vtex[2].xy = t_vpos
+    * o_eb[0..1][0..1] = eb[0..1][0..1]
+    * o_interlaced = interlaced
     *
     * if(count > 0) { // Apply motion vectors
     *    scale = norm * 0.5;
@@ -182,6 +202,13 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_ENDIF(shader);
    ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
 
+   ureg_MOV(shader, o_eb[0][0], eb[0][0]);
+   ureg_MOV(shader, o_eb[0][1], eb[0][1]);
+   ureg_MOV(shader, o_eb[1][0], eb[1][0]);
+   ureg_MOV(shader, o_eb[1][1], eb[1][1]);
+
+   ureg_MOV(shader, o_interlaced, interlaced);
+
    if(count > 0) {
       ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, ureg_imm1f(shader, 0.5f));
       for (i = 0; i < count; ++i)
@@ -209,11 +236,13 @@ calc_field(struct ureg_program *shader)
    /*
     * line going from 0 to 8 in steps of 0.5
     *
-    * tmp.y = fraction(line.y)
-    * tmp.y = tmp.y >= 0.5 ? 1 : 0
+    * tmp.z = fraction(line.y)
+    * tmp.z = tmp.z >= 0.5 ? 1 : 0
+    * tmp.xy = line >= 4 ? 1 : 0
     */
-   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(line, TGSI_SWIZZLE_Y));
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line, ureg_imm2f(shader, BLOCK_WIDTH / 2, BLOCK_HEIGHT / 2));
 
    return tmp;
 }
@@ -221,19 +250,26 @@ calc_field(struct ureg_program *shader)
 static struct ureg_dst
 fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src tc[3];
-   struct ureg_src sampler[3];
-   struct ureg_dst texel, t_tc, tmp;
-   unsigned i;
+   struct ureg_src tc[3], sampler[3], eb[2][2], interlaced;
+   struct ureg_dst texel, t_tc, t_eb_info, tmp;
+   unsigned i, label, l_x, l_y;
 
    texel = ureg_DECL_temporary(shader);
    t_tc = ureg_DECL_temporary(shader);
+   t_eb_info = ureg_DECL_temporary(shader);
    tmp = ureg_DECL_temporary(shader);
 
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
    tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR);
 
+   eb[0][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_0, TGSI_INTERPOLATE_CONSTANT);
+   eb[0][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_1, TGSI_INTERPOLATE_CONSTANT);
+   eb[1][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_0, TGSI_INTERPOLATE_CONSTANT);
+   eb[1][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_1, TGSI_INTERPOLATE_CONSTANT);
+
+   interlaced = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INTERLACED, TGSI_INTERPOLATE_CONSTANT);
+
    for (i = 0; i < 3; ++i)  {
       sampler[i] = ureg_DECL_sampler(shader, i);
    }
@@ -243,23 +279,52 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
     * texel.cb = tex(tc[2], sampler[1])
     * texel.cr = tex(tc[2], sampler[2])
     */
-   for (i = 0; i < 3; ++i) {
-      if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
 
-         ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
+   ureg_IF(shader, interlaced, &label);
+      ureg_MOV(shader, ureg_writemask(field, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z));
+   ureg_ENDIF(shader);
+
+   ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
             ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
             tc[1], tc[0]);
 
-      } else {
-         ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[2]);
-      }
+   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &l_y);
+
+      ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X), &l_x);
+         ureg_MOV(shader, t_eb_info, eb[1][1]);
+      ureg_ELSE(shader, &l_x);
+         ureg_MOV(shader, t_eb_info, eb[1][0]);
+      ureg_ENDIF(shader);
+
+   ureg_ELSE(shader, &l_y);
+
+      ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X), &l_x);
+         ureg_MOV(shader, t_eb_info, eb[0][1]);
+      ureg_ELSE(shader, &l_x);
+         ureg_MOV(shader, t_eb_info, eb[0][0]);
+      ureg_ENDIF(shader);
 
-      /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
-      ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler[i]);
-      ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
+   ureg_ENDIF(shader);
+
+   for (i = 0; i < 3; ++i) {
+      ureg_IF(shader, ureg_scalar(ureg_src(t_eb_info), TGSI_SWIZZLE_X + i), &label);
+         ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_imm1f(shader, 0.0f));
+      ureg_ELSE(shader, &label);
+
+         /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
+         if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
+            ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler[i]);
+         } else {
+            ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, tc[2], sampler[i]);
+         }
+
+         ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
+
+      ureg_ENDIF(shader);
    }
 
    ureg_release_temporary(shader, t_tc);
+   ureg_release_temporary(shader, t_eb_info);
    ureg_release_temporary(shader, tmp);
 
    return texel;
@@ -294,7 +359,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frame
           * result = tex(field.y ? tc[1] : tc[0], sampler[0])
           */
          ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
             tc[1], tc[0]);
          ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]);
 
@@ -318,12 +383,12 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frame
           *    ref[0..1] = tex(tc[2..3], sampler[0..1])
           */
          ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
             tc[1], tc[0]);
          ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]);
 
          ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
             tc[3], tc[2]);
          ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[1]);
 
@@ -392,7 +457,7 @@ init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE typ
       return false;
 
    handler->vertex_elems_state = r->pipe->create_vertex_elements_state(
-      r->pipe, 3 + ref_frames * mv_per_frame, vertex_elems);
+      r->pipe, 7 + ref_frames * mv_per_frame, vertex_elems);
 
    if (handler->vertex_elems_state == NULL)
       return false;
@@ -400,8 +465,8 @@ init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE typ
    if (!vl_vb_init(&handler->pos, r->macroblocks_per_batch))
       return false;
 
-   handler->interlaced = MALLOC(sizeof(float) * r->macroblocks_per_batch * 4);
-   if (handler->interlaced == NULL)
+   handler->info = MALLOC(sizeof(struct vertex_stream_0) * r->macroblocks_per_batch * 4);
+   if (handler->info == NULL)
       return false;
 
    for (i = 0; i < ref_frames * mv_per_frame; ++i) {
@@ -431,8 +496,7 @@ cleanup_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE
    r->pipe->delete_fs_state(r->pipe, handler->fs);
    r->pipe->delete_vertex_elements_state(r->pipe, handler->vertex_elems_state);
 
-   handler->interlaced = MALLOC(sizeof(float) * r->macroblocks_per_batch * 4);
-   FREE(handler->interlaced);
+   FREE(handler->info);
 
    for (i = 0; i < ref_frames * mv_per_frame; ++i)
       FREE(handler->mv[i]);
@@ -470,7 +534,7 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    /* Luma filter */
    filters[0] = PIPE_TEX_FILTER_NEAREST;
    /* Chroma filters */
-   if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
+   if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 || true) { //TODO
       filters[1] = PIPE_TEX_FILTER_NEAREST;
       filters[2] = PIPE_TEX_FILTER_NEAREST;
    }
@@ -603,15 +667,15 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
       sizeof(struct vertex2f) * 4 * r->macroblocks_per_batch
    );
 
-   r->vertex_bufs.individual.interlaced.stride = sizeof(float);
-   r->vertex_bufs.individual.interlaced.max_index = 4 * r->macroblocks_per_batch - 1;
-   r->vertex_bufs.individual.interlaced.buffer_offset = 0;
+   r->vertex_bufs.individual.info.stride = sizeof(struct vertex_stream_0);
+   r->vertex_bufs.individual.info.max_index = 4 * r->macroblocks_per_batch - 1;
+   r->vertex_bufs.individual.info.buffer_offset = 0;
    /* XXX: Create with usage DYNAMIC or STREAM */
-   r->vertex_bufs.individual.interlaced.buffer = pipe_buffer_create
+   r->vertex_bufs.individual.info.buffer = pipe_buffer_create
    (
       r->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
-      sizeof(float) * 4 * r->macroblocks_per_batch
+      sizeof(struct vertex_stream_0) * 4 * r->macroblocks_per_batch
    );
 
    for (i = 0; i < 4; ++i) {
@@ -641,8 +705,32 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    vertex_elems[VS_I_VPOS].vertex_buffer_index = 1;
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
-   /* progressive=1.0f interlaced=0.0f */
-   vertex_elems[VS_I_INTERLACED].src_offset = 0;
+   /* y, cr, cb empty block element top left block */
+   vertex_elems[VS_I_EB_0_0].src_offset = 0;
+   vertex_elems[VS_I_EB_0_0].instance_divisor = 0;
+   vertex_elems[VS_I_EB_0_0].vertex_buffer_index = 2;
+   vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+
+   /* y, cr, cb empty block element top right block */
+   vertex_elems[VS_I_EB_0_1].src_offset = sizeof(float) * 3;
+   vertex_elems[VS_I_EB_0_1].instance_divisor = 0;
+   vertex_elems[VS_I_EB_0_1].vertex_buffer_index = 2;
+   vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+
+   /* y, cr, cb empty block element bottom left block */
+   vertex_elems[VS_I_EB_1_0].src_offset = sizeof(float) * 6;
+   vertex_elems[VS_I_EB_1_0].instance_divisor = 0;
+   vertex_elems[VS_I_EB_1_0].vertex_buffer_index = 2;
+   vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+
+   /* y, cr, cb empty block element bottom right block */
+   vertex_elems[VS_I_EB_1_1].src_offset = sizeof(float) * 9;
+   vertex_elems[VS_I_EB_1_1].instance_divisor = 0;
+   vertex_elems[VS_I_EB_1_1].vertex_buffer_index = 2;
+   vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+
+   /* progressive=0.0f interlaced=1.0f */
+   vertex_elems[VS_I_INTERLACED].src_offset = sizeof(float) * 12;
    vertex_elems[VS_I_INTERLACED].instance_divisor = 0;
    vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 2;
    vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
@@ -733,8 +821,8 @@ upload_vertex_stream(struct vl_mpeg12_mc_renderer *r,
                       unsigned num_macroblocks[VL_NUM_MACROBLOCK_TYPES])
 {
    struct quadf *pos;
+   struct vertex_stream_0 *info;
    struct vertex2f *mv[4];
-   float *interlaced;
 
    struct pipe_transfer *buf_transfer[7];
 
@@ -751,10 +839,10 @@ upload_vertex_stream(struct vl_mpeg12_mc_renderer *r,
       &buf_transfer[0]
    );
 
-   interlaced = (float *)pipe_buffer_map
+   info = (struct vertex_stream_0 *)pipe_buffer_map
    (
       r->pipe,
-      r->vertex_bufs.individual.interlaced.buffer,
+      r->vertex_bufs.individual.info.buffer,
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &buf_transfer[1]
    );
@@ -779,8 +867,8 @@ upload_vertex_stream(struct vl_mpeg12_mc_renderer *r,
 
          pos += count;
 
-         memcpy(interlaced, handler->interlaced, sizeof(float) * count * 4);
-         interlaced += count * 4;
+         memcpy(info, handler->info, sizeof(struct vertex_stream_0) * count * 4);
+         info += count * 4;
 
          for (j = 0; j < ref_frames * mv_per_frame; ++j)
             memcpy(mv[j], handler->mv[j], sizeof(struct vertex2f) * count * 4);
@@ -792,7 +880,7 @@ upload_vertex_stream(struct vl_mpeg12_mc_renderer *r,
    }
 
    pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
-   pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.interlaced.buffer, buf_transfer[1]);
+   pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.info.buffer, buf_transfer[1]);
    for (i = 0; i < 4; ++i)
       pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.mv[i].buffer, buf_transfer[i + 2]);
 }
@@ -990,6 +1078,20 @@ get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4])
    }
 }
 
+static bool
+empty_block(enum pipe_video_chroma_format chroma_format,
+            unsigned cbp, unsigned component,
+            unsigned x, unsigned y)
+{
+   /* TODO: Implement 422, 444 */
+   assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+
+   if(component == 0) /*luma*/
+      return !(cbp  & (1 << (5 - (x + y * 2))));
+   else /*cr cb*/
+      return !(cbp & (1 << (2 - component)));
+}
+
 static void
 grab_vectors(struct vl_mpeg12_mc_renderer *r,
              struct pipe_mpeg12_macroblock *mb)
@@ -997,6 +1099,8 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
    enum VL_MACROBLOCK_TYPE type;
    struct vl_mc_mbtype_handler *handler;
    struct vertex2f mv[4];
+   struct vertex_stream_0 info;
+
    unsigned ref_frames, mv_per_frame;
    unsigned i, j, pos;
 
@@ -1011,11 +1115,20 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
    handler = &r->mbtype_handlers[type];
 
    pos = handler->pos.num_blocks * 4;
-   vl_vb_add_block(&handler->pos, false, mb->mbx, mb->mby);
+   vl_vb_add_block(&handler->pos, mb->mbx, mb->mby);
+
+   for ( i = 0; i < 2; ++i) {
+      for ( j = 0; j < 2; ++j) {
+         info.eb[i][j].y = empty_block(r->chroma_format, mb->cbp, 0, j, i);
+         info.eb[i][j].cr = empty_block(r->chroma_format, mb->cbp, 1, j, i);
+         info.eb[i][j].cb = empty_block(r->chroma_format, mb->cbp, 2, j, i);         
+      }
+   }
+   info.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
 
    get_motion_vectors(mb, mv);
    for ( i = 0; i < 4; ++i ) {
-      handler->interlaced[i + pos] = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
+      handler->info[i + pos] = info;
       for ( j = 0; j < ref_frames * mv_per_frame; ++j )
          handler->mv[j][i + pos] = mv[j];
    }
@@ -1033,22 +1146,24 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
 
    for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
-         bool eb = !(cbp  & (1 << (5 - tb)));
-         vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, eb ? NULL : blocks);
-         blocks += eb ? 0 : BLOCK_WIDTH * BLOCK_HEIGHT;
+         if (!empty_block(r->chroma_format, cbp, 0, x, y)) {
+            vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, blocks);
+            blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
+         }
       }
    }
 
    /* TODO: Implement 422, 444 */
    assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 
-   for (tb = 0; tb < 2; ++tb) {
-      bool eb = !(cbp & (1 << (1 - tb)));
-      if(tb == 0)
-         vl_idct_add_block(&r->idct_cb, mbx, mby, eb ? NULL : blocks);
-      else
-         vl_idct_add_block(&r->idct_cr, mbx, mby, eb ? NULL : blocks);
-      blocks += eb ? 0 : BLOCK_WIDTH * BLOCK_HEIGHT;
+   for (tb = 1; tb < 3; ++tb) {
+      if (!empty_block(r->chroma_format, cbp, tb, 0, 0)) {
+         if(tb == 1)
+            vl_idct_add_block(&r->idct_cb, mbx, mby, blocks);
+         else
+            vl_idct_add_block(&r->idct_cr, mbx, mby, blocks);
+         blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
+      }
    }
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 2b6c21aa5c2..9195718750d 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -59,13 +59,23 @@ enum VL_MACROBLOCK_TYPE
    VL_NUM_MACROBLOCK_TYPES
 };
 
+struct vertex_stream_0
+{
+   struct {
+      float y;
+      float cr;
+      float cb;
+   } eb[2][2];
+   float interlaced;
+};
+
 struct vl_mc_mbtype_handler
 {
    void *vs, *fs;
    void *vertex_elems_state;
 
    struct vl_vertex_buffer pos;
-   float *interlaced;
+   struct vertex_stream_0 *info;
    struct vertex2f *mv[4];
 };
 
@@ -111,7 +121,7 @@ struct vl_mpeg12_mc_renderer
    {
       struct pipe_vertex_buffer all[7];
       struct {
-         struct pipe_vertex_buffer quad, ycbcr, interlaced, mv[4];
+         struct pipe_vertex_buffer quad, ycbcr, info, mv[4];
       } individual;
    } vertex_bufs;
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 43ddc342d3d..fa4ec1dedd8 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -41,31 +41,17 @@ struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned
 
 bool vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks);
 
-static inline bool
-vl_vb_add_block(struct vl_vertex_buffer *buffer, bool allow_merge, signed x, signed y)
+static inline void
+vl_vb_add_block(struct vl_vertex_buffer *buffer, signed x, signed y)
 {
    struct quadf *quad;
 
    assert(buffer);
 
-   allow_merge &= buffer->num_blocks > 0;
-   if (allow_merge) {
-
-      quad = buffer->blocks + buffer->num_blocks - 1;
-      if(quad->tr.x == (x - 1) && quad->br.x == (x - 1) && 
-         quad->tr.y == y && quad->br.y == y) {
-
-         quad->tr.x = quad->br.x = x;
-         quad->tr.y = quad->br.y = y;
-         return true;
-      } 
-   }
-
    quad = buffer->blocks + buffer->num_blocks;
    quad->bl.x = quad->tl.x = quad->tr.x = quad->br.x = x;
    quad->bl.y = quad->tl.y = quad->tr.y = quad->br.y = y;
    buffer->num_blocks++;
-   return false;
 }
 
 unsigned vl_vb_upload(struct vl_vertex_buffer *buffer, struct quadf *dst);
-- 
cgit v1.2.3


From 8e0c05960daa7a38ab7834e6a9e7e0a7a973ac2d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 5 Dec 2010 16:46:10 +0100
Subject: [g3dvl] cleanup empty block handling

doing empty block handling in the mc code is indeed faster
---
 src/gallium/auxiliary/vl/vl_idct.c | 92 ++++++++++----------------------------
 src/gallium/auxiliary/vl/vl_idct.h |  4 +-
 2 files changed, 26 insertions(+), 70 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index b65a808a592..a5b2053065e 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -47,11 +47,6 @@
 
 #define NR_RENDER_TARGETS 1
 
-struct vertex_shader_consts
-{
-   struct vertex4f norm;
-};
-
 enum VS_INPUT
 {
    VS_I_RECT,
@@ -80,7 +75,7 @@ static const float const_matrix[8][8] = {
 };
 
 static void *
-create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
+create_vert_shader(struct vl_idct *idct)
 {
    struct ureg_program *shader;
    struct ureg_src scale;
@@ -98,6 +93,9 @@ create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+   o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
+   o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
+   o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
 
    /*
     * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
@@ -120,15 +118,9 @@ create_vert_shader(struct vl_idct *idct, bool calc_src_cords)
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
-   if(calc_src_cords) {
-      o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
-      o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
-      o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
-
-      ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
-      ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-      ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
-   }
+   ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
+   ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
 
    ureg_release_temporary(shader, t_vpos);
 
@@ -213,7 +205,7 @@ create_transpose_frag_shader(struct vl_idct *idct)
    struct ureg_src block, tex, sampler[2];
    struct ureg_src start[2];
 
-   struct ureg_dst m[2][2];
+   struct ureg_dst l[2], r[2];
    struct ureg_dst tmp, fragment;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
@@ -229,20 +221,20 @@ create_transpose_frag_shader(struct vl_idct *idct)
    start[0] = ureg_imm1f(shader, 0.0f);
    start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
 
-   fetch_four(shader, m[0], block, sampler[0], start[0], block, false, false, transpose->width0);
-   fetch_four(shader, m[1], tex, sampler[1], start[1], block, true, false, intermediate->height0);
+   fetch_four(shader, l, block, sampler[0], start[0], block, false, false, transpose->width0);
+   fetch_four(shader, r, tex, sampler[1], start[1], block, true, false, intermediate->height0);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    tmp = ureg_DECL_temporary(shader);
-   matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), m[0], m[1]);
+   matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), l, r);
    ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
 
    ureg_release_temporary(shader, tmp);
-   ureg_release_temporary(shader, m[0][0]);
-   ureg_release_temporary(shader, m[0][1]);
-   ureg_release_temporary(shader, m[1][0]);
-   ureg_release_temporary(shader, m[1][1]);
+   ureg_release_temporary(shader, l[0]);
+   ureg_release_temporary(shader, l[1]);
+   ureg_release_temporary(shader, r[0]);
+   ureg_release_temporary(shader, r[1]);
 
    ureg_END(shader);
 
@@ -325,54 +317,25 @@ create_matrix_frag_shader(struct vl_idct *idct)
    return ureg_create_shader_and_destroy(shader, idct->pipe);
 }
 
-static void *
-create_empty_block_frag_shader(struct vl_idct *idct)
-{
-   struct ureg_program *shader;
-   struct ureg_dst fragment;
-
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
-
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
-   ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
-
-   ureg_END(shader);
-
-   return ureg_create_shader_and_destroy(shader, idct->pipe);
-}
-
 static bool
 init_shaders(struct vl_idct *idct)
 {
-   idct->matrix_vs = create_vert_shader(idct, true);
+   idct->vs = create_vert_shader(idct);
    idct->matrix_fs = create_matrix_frag_shader(idct);
-
-   idct->transpose_vs = create_vert_shader(idct, true);
    idct->transpose_fs = create_transpose_frag_shader(idct);
 
-   idct->eb_vs = create_vert_shader(idct, false);
-   idct->eb_fs = create_empty_block_frag_shader(idct);
-
    return 
-      idct->transpose_vs != NULL && idct->transpose_fs != NULL &&
-      idct->matrix_vs != NULL && idct->matrix_fs != NULL &&
-      idct->eb_vs != NULL && idct->eb_fs != NULL;
+      idct->vs != NULL &&
+      idct->transpose_fs != NULL &&
+      idct->matrix_fs != NULL;
 }
 
 static void
 cleanup_shaders(struct vl_idct *idct)
 {
-   idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
-   idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
-
-   idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
+   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
    idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
-
-   idct->pipe->delete_vs_state(idct->pipe, idct->eb_vs);
-   idct->pipe->delete_fs_state(idct->pipe, idct->eb_fs);
+   idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
 }
 
 static bool
@@ -725,31 +688,24 @@ vl_idct_flush(struct vl_idct *idct)
    if(num_blocks > 0) {
 
       idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
+      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
+      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
+      idct->pipe->bind_vs_state(idct->pipe, idct->vs);
 
       /* first stage */
       idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]);
       idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[0]);
-
-      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
-      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
-      idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
-
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
 
       /* second stage */
       idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
       idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
-
-      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
-      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
-      idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
-
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
    }
 
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 3388d91f353..ccaaf2653de 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -66,8 +66,8 @@ struct vl_idct
       } individual;
    } sampler_views;
 
-   void *matrix_vs, *transpose_vs, *eb_vs;
-   void *matrix_fs, *transpose_fs, *eb_fs;
+   void *vs;
+   void *matrix_fs, *transpose_fs;
 
    union
    {
-- 
cgit v1.2.3


From 57018734023b41ef84f5be560c12dce6ae5f1c58 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 5 Dec 2010 17:57:52 +0100
Subject: [g3dvl] move vertex handling into vl_vb object

---
 src/gallium/auxiliary/vl/vl_idct.c               |  19 ++--
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 133 ++++++++++-------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  17 +--
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     |  19 ++--
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     |  23 ++--
 5 files changed, 94 insertions(+), 117 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index a5b2053065e..66bb140f02a 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -618,7 +618,7 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
       return false;
    }
 
-   if(!vl_vb_init(&idct->blocks, idct->max_blocks)) {
+   if(!vl_vb_init(&idct->blocks, idct->max_blocks, 2)) {
       cleanup_shaders(idct);
       cleanup_buffers(idct);
       return false;
@@ -646,6 +646,7 @@ vl_idct_cleanup(struct vl_idct *idct)
 void
 vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 {
+   struct vertex2f v;
    unsigned tex_pitch;
    short *texels;
 
@@ -659,15 +660,17 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
    for (i = 0; i < BLOCK_HEIGHT; ++i)
       memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
 
-   vl_vb_add_block(&idct->blocks, x, y);
+   v.x = x;
+   v.y = y;
+   vl_vb_add_block(&idct->blocks, (float*)&v);
 }
 
 void
 vl_idct_flush(struct vl_idct *idct)
 {
    struct pipe_transfer *vec_transfer;
-   struct quadf *vectors;
-   unsigned num_blocks;
+   void *vectors;
+   unsigned num_verts;
 
    assert(idct);
 
@@ -679,13 +682,13 @@ vl_idct_flush(struct vl_idct *idct)
       &vec_transfer
    );
 
-   num_blocks = vl_vb_upload(&idct->blocks, vectors);
+   num_verts = vl_vb_upload(&idct->blocks, vectors);
 
    pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, vec_transfer);
 
    xfer_buffers_unmap(idct);
 
-   if(num_blocks > 0) {
+   if(num_verts > 0) {
 
       idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
       idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
@@ -698,7 +701,7 @@ vl_idct_flush(struct vl_idct *idct)
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
       idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
-      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
+      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts);
 
       /* second stage */
       idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
@@ -706,7 +709,7 @@ vl_idct_flush(struct vl_idct *idct)
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
       idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
-      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_blocks * 4);
+      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts);
    }
 
    xfer_buffers_map(idct);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 76db8c1b211..064e6c1a2d5 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -48,6 +48,17 @@ struct vertex_shader_consts
    struct vertex4f norm;
 };
 
+struct vertex_stream_0
+{
+   struct vertex2f pos;
+   struct {
+      float y;
+      float cr;
+      float cb;
+   } eb[2][2];
+   float interlaced;
+};
+
 enum VS_INPUT
 {
    VS_I_RECT,
@@ -462,16 +473,11 @@ init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE typ
    if (handler->vertex_elems_state == NULL)
       return false;
 
-   if (!vl_vb_init(&handler->pos, r->macroblocks_per_batch))
-      return false;
-
-   handler->info = MALLOC(sizeof(struct vertex_stream_0) * r->macroblocks_per_batch * 4);
-   if (handler->info == NULL)
+   if (!vl_vb_init(&handler->pos, r->macroblocks_per_batch, sizeof(struct vertex_stream_0) / sizeof(float)))
       return false;
 
    for (i = 0; i < ref_frames * mv_per_frame; ++i) {
-      handler->mv[i] = MALLOC(sizeof(struct vertex2f) * r->macroblocks_per_batch * 4);
-      if (handler->mv[i] == NULL)
+      if (!vl_vb_init(&handler->mv[i], r->macroblocks_per_batch, sizeof(struct vertex2f) / sizeof(float)))
          return false;
    }
 
@@ -496,10 +502,10 @@ cleanup_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE
    r->pipe->delete_fs_state(r->pipe, handler->fs);
    r->pipe->delete_vertex_elements_state(r->pipe, handler->vertex_elems_state);
 
-   FREE(handler->info);
+   vl_vb_cleanup(&handler->pos);
 
    for (i = 0; i < ref_frames * mv_per_frame; ++i)
-      FREE(handler->mv[i]);
+      vl_vb_cleanup(&handler->mv[i]);
 }
 
 
@@ -656,22 +662,11 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
 
    r->vertex_bufs.individual.quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch);
 
-   r->vertex_bufs.individual.ycbcr.stride = sizeof(struct vertex2f);
-   r->vertex_bufs.individual.ycbcr.max_index = 4 * r->macroblocks_per_batch - 1;
-   r->vertex_bufs.individual.ycbcr.buffer_offset = 0;
-   /* XXX: Create with usage DYNAMIC or STREAM */
-   r->vertex_bufs.individual.ycbcr.buffer = pipe_buffer_create
-   (
-      r->pipe->screen,
-      PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vertex2f) * 4 * r->macroblocks_per_batch
-   );
-
-   r->vertex_bufs.individual.info.stride = sizeof(struct vertex_stream_0);
-   r->vertex_bufs.individual.info.max_index = 4 * r->macroblocks_per_batch - 1;
-   r->vertex_bufs.individual.info.buffer_offset = 0;
+   r->vertex_bufs.individual.pos.stride = sizeof(struct vertex_stream_0);
+   r->vertex_bufs.individual.pos.max_index = 4 * r->macroblocks_per_batch - 1;
+   r->vertex_bufs.individual.pos.buffer_offset = 0;
    /* XXX: Create with usage DYNAMIC or STREAM */
-   r->vertex_bufs.individual.info.buffer = pipe_buffer_create
+   r->vertex_bufs.individual.pos.buffer = pipe_buffer_create
    (
       r->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
@@ -706,57 +701,57 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* y, cr, cb empty block element top left block */
-   vertex_elems[VS_I_EB_0_0].src_offset = 0;
+   vertex_elems[VS_I_EB_0_0].src_offset = sizeof(float) * 2;
    vertex_elems[VS_I_EB_0_0].instance_divisor = 0;
-   vertex_elems[VS_I_EB_0_0].vertex_buffer_index = 2;
+   vertex_elems[VS_I_EB_0_0].vertex_buffer_index = 1;
    vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
 
    /* y, cr, cb empty block element top right block */
-   vertex_elems[VS_I_EB_0_1].src_offset = sizeof(float) * 3;
+   vertex_elems[VS_I_EB_0_1].src_offset = sizeof(float) * 5;
    vertex_elems[VS_I_EB_0_1].instance_divisor = 0;
-   vertex_elems[VS_I_EB_0_1].vertex_buffer_index = 2;
+   vertex_elems[VS_I_EB_0_1].vertex_buffer_index = 1;
    vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
 
    /* y, cr, cb empty block element bottom left block */
-   vertex_elems[VS_I_EB_1_0].src_offset = sizeof(float) * 6;
+   vertex_elems[VS_I_EB_1_0].src_offset = sizeof(float) * 8;
    vertex_elems[VS_I_EB_1_0].instance_divisor = 0;
-   vertex_elems[VS_I_EB_1_0].vertex_buffer_index = 2;
+   vertex_elems[VS_I_EB_1_0].vertex_buffer_index = 1;
    vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
 
    /* y, cr, cb empty block element bottom right block */
-   vertex_elems[VS_I_EB_1_1].src_offset = sizeof(float) * 9;
+   vertex_elems[VS_I_EB_1_1].src_offset = sizeof(float) * 11;
    vertex_elems[VS_I_EB_1_1].instance_divisor = 0;
-   vertex_elems[VS_I_EB_1_1].vertex_buffer_index = 2;
+   vertex_elems[VS_I_EB_1_1].vertex_buffer_index = 1;
    vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
 
    /* progressive=0.0f interlaced=1.0f */
-   vertex_elems[VS_I_INTERLACED].src_offset = sizeof(float) * 12;
+   vertex_elems[VS_I_INTERLACED].src_offset = sizeof(float) * 14;
    vertex_elems[VS_I_INTERLACED].instance_divisor = 0;
-   vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 2;
+   vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 1;
    vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
 
    /* First ref surface top field texcoord element */
    vertex_elems[VS_I_MV0].src_offset = 0;
    vertex_elems[VS_I_MV0].instance_divisor = 0;
-   vertex_elems[VS_I_MV0].vertex_buffer_index = 3;
+   vertex_elems[VS_I_MV0].vertex_buffer_index = 2;
    vertex_elems[VS_I_MV0].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* First ref surface bottom field texcoord element */
    vertex_elems[VS_I_MV1].src_offset = 0;
    vertex_elems[VS_I_MV1].instance_divisor = 0;
-   vertex_elems[VS_I_MV1].vertex_buffer_index = 4;
+   vertex_elems[VS_I_MV1].vertex_buffer_index = 3;
    vertex_elems[VS_I_MV1].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Second ref surface top field texcoord element */
    vertex_elems[VS_I_MV2].src_offset = 0;
    vertex_elems[VS_I_MV2].instance_divisor = 0;
-   vertex_elems[VS_I_MV2].vertex_buffer_index = 5;
+   vertex_elems[VS_I_MV2].vertex_buffer_index = 4;
    vertex_elems[VS_I_MV2].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* Second ref surface bottom field texcoord element */
    vertex_elems[VS_I_MV3].src_offset = 0;
    vertex_elems[VS_I_MV3].instance_divisor = 0;
-   vertex_elems[VS_I_MV3].vertex_buffer_index = 6;
+   vertex_elems[VS_I_MV3].vertex_buffer_index = 5;
    vertex_elems[VS_I_MV3].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    for(i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i)
@@ -820,69 +815,56 @@ static void
 upload_vertex_stream(struct vl_mpeg12_mc_renderer *r,
                       unsigned num_macroblocks[VL_NUM_MACROBLOCK_TYPES])
 {
-   struct quadf *pos;
-   struct vertex_stream_0 *info;
+   struct vertex_stream_0 *pos;
    struct vertex2f *mv[4];
 
-   struct pipe_transfer *buf_transfer[7];
+   struct pipe_transfer *buf_transfer[5];
 
    unsigned i, j;
 
    assert(r);
    assert(num_macroblocks);
 
-   pos = (struct quadf *)pipe_buffer_map
+   pos = (struct vertex_stream_0 *)pipe_buffer_map
    (
       r->pipe,
-      r->vertex_bufs.individual.ycbcr.buffer,
+      r->vertex_bufs.individual.pos.buffer,
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &buf_transfer[0]
    );
 
-   info = (struct vertex_stream_0 *)pipe_buffer_map
-   (
-      r->pipe,
-      r->vertex_bufs.individual.info.buffer,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buf_transfer[1]
-   );
-
    for (i = 0; i < 4; ++i)
       mv[i] = (struct vertex2f *)pipe_buffer_map
       (
          r->pipe,
          r->vertex_bufs.individual.mv[i].buffer,
          PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-         &buf_transfer[i + 2]
+         &buf_transfer[i + 1]
       );
 
    for (i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) {
       struct vl_mc_mbtype_handler *handler = &r->mbtype_handlers[i];
       unsigned count = vl_vb_upload(&handler->pos, pos);
       if (count > 0) {
+         pos += count;
+
          unsigned ref_frames, mv_per_frame;
 
          ref_frames = const_mbtype_config[i][0];
          mv_per_frame = const_mbtype_config[i][1];
 
-         pos += count;
-
-         memcpy(info, handler->info, sizeof(struct vertex_stream_0) * count * 4);
-         info += count * 4;
-
          for (j = 0; j < ref_frames * mv_per_frame; ++j)
-            memcpy(mv[j], handler->mv[j], sizeof(struct vertex2f) * count * 4);
+            vl_vb_upload(&handler->mv[j], mv[j]);
 
          for (j = 0; j < 4; ++j)
-            mv[j] += count * 4;
+            mv[j] += count;
       }
       num_macroblocks[i] = count;
    }
 
-   pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.ycbcr.buffer, buf_transfer[0]);
-   pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.info.buffer, buf_transfer[1]);
+   pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.pos.buffer, buf_transfer[0]);
    for (i = 0; i < 4; ++i)
-      pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.mv[i].buffer, buf_transfer[i + 2]);
+      pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.mv[i].buffer, buf_transfer[i + 1]);
 }
 
 static struct pipe_sampler_view
@@ -926,7 +908,7 @@ flush_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE ty
 
    handler = &r->mbtype_handlers[type];
 
-   r->pipe->set_vertex_buffers(r->pipe, 3 + ref_frames * mv_per_frame, r->vertex_bufs.all);
+   r->pipe->set_vertex_buffers(r->pipe, 2 + ref_frames * mv_per_frame, r->vertex_bufs.all);
    r->pipe->bind_vertex_elements_state(r->pipe, handler->vertex_elems_state);
 
    if(ref_frames == 2) {
@@ -955,14 +937,14 @@ flush_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE ty
    r->pipe->bind_vs_state(r->pipe, handler->vs);
    r->pipe->bind_fs_state(r->pipe, handler->fs);
 
-   util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start, num_macroblocks * 4);
-   return num_macroblocks * 4;
+   util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start, num_macroblocks);
+   return num_macroblocks;
 }
 
 static void
 flush(struct vl_mpeg12_mc_renderer *r)
 {
-   unsigned num_macroblocks[VL_NUM_MACROBLOCK_TYPES] = { 0 };
+   unsigned num_verts[VL_NUM_MACROBLOCK_TYPES] = { 0 };
    unsigned vb_start = 0, i;
 
    assert(r);
@@ -972,15 +954,15 @@ flush(struct vl_mpeg12_mc_renderer *r)
    vl_idct_flush(&r->idct_cr);
    vl_idct_flush(&r->idct_cb);
 
-   upload_vertex_stream(r, num_macroblocks);
+   upload_vertex_stream(r, num_verts);
 
    r->pipe->bind_rasterizer_state(r->pipe, r->rs_state);
    r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
    r->pipe->set_viewport_state(r->pipe, &r->viewport);
 
    for (i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) {
-      if (num_macroblocks[i] > 0)
-         vb_start += flush_mbtype_handler(r, i, vb_start, num_macroblocks[i]);
+      if (num_verts[i] > 0)
+         vb_start += flush_mbtype_handler(r, i, vb_start, num_verts[i]);
    }
 
    r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
@@ -1114,9 +1096,10 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
 
    handler = &r->mbtype_handlers[type];
 
-   pos = handler->pos.num_blocks * 4;
-   vl_vb_add_block(&handler->pos, mb->mbx, mb->mby);
+   pos = handler->pos.num_verts;
 
+   info.pos.x = mb->mbx;
+   info.pos.y = mb->mby;
    for ( i = 0; i < 2; ++i) {
       for ( j = 0; j < 2; ++j) {
          info.eb[i][j].y = empty_block(r->chroma_format, mb->cbp, 0, j, i);
@@ -1125,13 +1108,11 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
       }
    }
    info.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
+   vl_vb_add_block(&handler->pos, (float*)&info);
 
    get_motion_vectors(mb, mv);
-   for ( i = 0; i < 4; ++i ) {
-      handler->info[i + pos] = info;
-      for ( j = 0; j < ref_frames * mv_per_frame; ++j )
-         handler->mv[j][i + pos] = mv[j];
-   }
+   for ( j = 0; j < ref_frames * mv_per_frame; ++j )
+      vl_vb_add_block(&handler->mv[j], (float*)&mv[j]);
 }
 
 static void
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 9195718750d..fb169c7421e 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -59,24 +59,13 @@ enum VL_MACROBLOCK_TYPE
    VL_NUM_MACROBLOCK_TYPES
 };
 
-struct vertex_stream_0
-{
-   struct {
-      float y;
-      float cr;
-      float cb;
-   } eb[2][2];
-   float interlaced;
-};
-
 struct vl_mc_mbtype_handler
 {
    void *vs, *fs;
    void *vertex_elems_state;
 
    struct vl_vertex_buffer pos;
-   struct vertex_stream_0 *info;
-   struct vertex2f *mv[4];
+   struct vl_vertex_buffer mv[4];
 };
 
 struct vl_mpeg12_mc_renderer
@@ -119,9 +108,9 @@ struct vl_mpeg12_mc_renderer
 
    union
    {
-      struct pipe_vertex_buffer all[7];
+      struct pipe_vertex_buffer all[6];
       struct {
-         struct pipe_vertex_buffer quad, ycbcr, info, mv[4];
+         struct pipe_vertex_buffer quad, pos, mv[4];
       } individual;
    } vertex_bufs;
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 6df11db0aef..dba69f9a6a2 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -82,27 +82,28 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks)
 }
 
 bool
-vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks)
+vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks, unsigned num_elements)
 {
    assert(buffer);
 
-   buffer->num_blocks = 0;
-   buffer->blocks = MALLOC(max_blocks * sizeof(struct quadf));
-   return buffer->blocks != NULL;
+   buffer->num_verts = 0;
+   buffer->num_elements = num_elements;
+   buffer->buffer = MALLOC(max_blocks * num_elements * sizeof(float) * 4);
+   return buffer->buffer != NULL;
 }
 
 unsigned
-vl_vb_upload(struct vl_vertex_buffer *buffer, struct quadf *dst)
+vl_vb_upload(struct vl_vertex_buffer *buffer, void *dst)
 {
    unsigned todo;
 
    assert(buffer);
 
-   todo = buffer->num_blocks;
-   buffer->num_blocks = 0;
+   todo = buffer->num_verts;
+   buffer->num_verts = 0;
 
    if(todo)
-      memcpy(dst, buffer->blocks, sizeof(struct quadf) * todo);
+      memcpy(dst, buffer->buffer, sizeof(float) * buffer->num_elements * todo);
 
    return todo;
 }
@@ -112,5 +113,5 @@ vl_vb_cleanup(struct vl_vertex_buffer *buffer)
 {
    assert(buffer);
 
-   FREE(buffer->blocks);
+   FREE(buffer->buffer);
 }
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index fa4ec1dedd8..8d7c0e6a275 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -33,28 +33,31 @@
 
 struct vl_vertex_buffer
 {
-   unsigned num_blocks;
-   struct quadf *blocks;
+   unsigned num_verts;
+   unsigned num_elements;
+   float *buffer;
 };
 
 struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks);
 
-bool vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks);
+bool vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks, unsigned num_elements);
 
 static inline void
-vl_vb_add_block(struct vl_vertex_buffer *buffer, signed x, signed y)
+vl_vb_add_block(struct vl_vertex_buffer *buffer, float *elements)
 {
-   struct quadf *quad;
+   float *pos;
+   unsigned i;
 
    assert(buffer);
 
-   quad = buffer->blocks + buffer->num_blocks;
-   quad->bl.x = quad->tl.x = quad->tr.x = quad->br.x = x;
-   quad->bl.y = quad->tl.y = quad->tr.y = quad->br.y = y;
-   buffer->num_blocks++;
+   for(i = 0; i < 4; ++i) {
+      pos = buffer->buffer + buffer->num_verts * buffer->num_elements;
+      memcpy(pos, elements, sizeof(float) * buffer->num_elements);
+      buffer->num_verts++;
+   }
 }
 
-unsigned vl_vb_upload(struct vl_vertex_buffer *buffer, struct quadf *dst);
+unsigned vl_vb_upload(struct vl_vertex_buffer *buffer, void *dst);
 
 void vl_vb_cleanup(struct vl_vertex_buffer *buffer);
 
-- 
cgit v1.2.3


From 6484898752d733a2442b433fbb78325f9021c698 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 5 Dec 2010 22:30:38 +0100
Subject: [g3dvl] move mapping/unmapping of buffers one layer up

---
 src/gallium/auxiliary/vl/vl_idct.c               | 70 ++++++++++++------------
 src/gallium/auxiliary/vl/vl_idct.h               |  4 ++
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  9 +++
 3 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 66bb140f02a..6aae28d465c 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -566,35 +566,6 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    return matrix;
 }
 
-static void
-xfer_buffers_map(struct vl_idct *idct)
-{
-   struct pipe_box rect =
-   {
-      0, 0, 0,
-      idct->textures.individual.source->width0,
-      idct->textures.individual.source->height0,
-      1
-   };
-
-   idct->tex_transfer = idct->pipe->get_transfer
-   (
-      idct->pipe, idct->textures.individual.source,
-      u_subresource(0, 0),
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &rect
-   );
-
-   idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
-}
-
-static void
-xfer_buffers_unmap(struct vl_idct *idct)
-{
-   idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
-   idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
-}
-
 bool
 vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst, struct pipe_resource *matrix)
 {
@@ -626,7 +597,7 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
 
    init_state(idct);
 
-   xfer_buffers_map(idct);
+   vl_idct_map_buffers(idct);
 
    return true;
 }
@@ -634,6 +605,8 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
 void
 vl_idct_cleanup(struct vl_idct *idct)
 {
+   vl_idct_unmap_buffers(idct);
+
    vl_vb_cleanup(&idct->blocks);
    cleanup_shaders(idct);
    cleanup_buffers(idct);
@@ -643,6 +616,30 @@ vl_idct_cleanup(struct vl_idct *idct)
    pipe_resource_reference(&idct->destination, NULL);
 }
 
+void
+vl_idct_map_buffers(struct vl_idct *idct)
+{
+   assert(idct);
+
+   struct pipe_box rect =
+   {
+      0, 0, 0,
+      idct->textures.individual.source->width0,
+      idct->textures.individual.source->height0,
+      1
+   };
+
+   idct->tex_transfer = idct->pipe->get_transfer
+   (
+      idct->pipe, idct->textures.individual.source,
+      u_subresource(0, 0),
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &rect
+   );
+
+   idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
+}
+
 void
 vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 {
@@ -665,6 +662,15 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
    vl_vb_add_block(&idct->blocks, (float*)&v);
 }
 
+void
+vl_idct_unmap_buffers(struct vl_idct *idct)
+{
+   assert(idct);
+
+   idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
+   idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
+}
+
 void
 vl_idct_flush(struct vl_idct *idct)
 {
@@ -686,8 +692,6 @@ vl_idct_flush(struct vl_idct *idct)
 
    pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, vec_transfer);
 
-   xfer_buffers_unmap(idct);
-
    if(num_verts > 0) {
 
       idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
@@ -711,6 +715,4 @@ vl_idct_flush(struct vl_idct *idct)
       idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts);
    }
-
-   xfer_buffers_map(idct);
 }
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index ccaaf2653de..96e3d267046 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -97,8 +97,12 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_r
 
 void vl_idct_cleanup(struct vl_idct *idct);
 
+void vl_idct_map_buffers(struct vl_idct *idct);
+
 void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block);
 
+void vl_idct_unmap_buffers(struct vl_idct *idct);
+
 void vl_idct_flush(struct vl_idct *idct);
 
 #endif
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 064e6c1a2d5..f97bc47a70a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -950,6 +950,10 @@ flush(struct vl_mpeg12_mc_renderer *r)
    assert(r);
    assert(r->num_macroblocks == r->macroblocks_per_batch);
 
+   vl_idct_unmap_buffers(&r->idct_y);
+   vl_idct_unmap_buffers(&r->idct_cr);
+   vl_idct_unmap_buffers(&r->idct_cb);
+
    vl_idct_flush(&r->idct_y);
    vl_idct_flush(&r->idct_cr);
    vl_idct_flush(&r->idct_cb);
@@ -965,8 +969,13 @@ flush(struct vl_mpeg12_mc_renderer *r)
          vb_start += flush_mbtype_handler(r, i, vb_start, num_verts[i]);
    }
 
+
    r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
 
+   vl_idct_map_buffers(&r->idct_y);
+   vl_idct_map_buffers(&r->idct_cr);
+   vl_idct_map_buffers(&r->idct_cb);
+
    r->num_macroblocks = 0;
 }
 
-- 
cgit v1.2.3


From eb7452e267974f9ff971b752046c512963f59f81 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 7 Dec 2010 00:29:02 +0100
Subject: [g3dvl] rework pot buffer handling and flushing

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 142 ++++++++++-------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  10 +-
 src/gallium/drivers/softpipe/sp_video_context.c  |  18 ++-
 3 files changed, 83 insertions(+), 87 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index f97bc47a70a..9983b47692d 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -519,10 +519,8 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
 
    assert(r);
 
-   r->viewport.scale[0] = r->pot_buffers ?
-      util_next_power_of_two(r->picture_width) : r->picture_width;
-   r->viewport.scale[1] = r->pot_buffers ?
-      util_next_power_of_two(r->picture_height) : r->picture_height;
+   r->viewport.scale[0] = r->buffer_width;
+   r->viewport.scale[1] = r->buffer_height;
    r->viewport.scale[2] = 1;
    r->viewport.scale[3] = 1;
    r->viewport.translate[0] = 0;
@@ -530,10 +528,8 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    r->viewport.translate[2] = 0;
    r->viewport.translate[3] = 0;
 
-   r->fb_state.width = r->pot_buffers ?
-      util_next_power_of_two(r->picture_width) : r->picture_width;
-   r->fb_state.height = r->pot_buffers ?
-      util_next_power_of_two(r->picture_height) : r->picture_height;
+   r->fb_state.width = r->buffer_width;
+   r->fb_state.height = r->buffer_height;
    r->fb_state.nr_cbufs = 1;
    r->fb_state.zsbuf = NULL;
 
@@ -607,9 +603,9 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    struct pipe_sampler_view sampler_view;
 
    const unsigned mbw =
-      align(r->picture_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
+      align(r->buffer_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
    const unsigned mbh =
-      align(r->picture_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
+      align(r->buffer_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
 
    unsigned i;
 
@@ -624,10 +620,8 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
    template.format = PIPE_FORMAT_R16_SNORM;
    template.last_level = 0;
-   template.width0 = r->pot_buffers ?
-      util_next_power_of_two(r->picture_width) : r->picture_width;
-   template.height0 = r->pot_buffers ?
-      util_next_power_of_two(r->picture_height) : r->picture_height;
+   template.width0 = r->buffer_width;
+   template.height0 = r->buffer_height;
    template.depth0 = 1;
    template.usage = PIPE_USAGE_DYNAMIC;
    template.bind = PIPE_BIND_SAMPLER_VIEW;
@@ -636,17 +630,11 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
 
    if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      template.width0 = r->pot_buffers ?
-         util_next_power_of_two(r->picture_width / 2) :
-         r->picture_width / 2;
-      template.height0 = r->pot_buffers ?
-         util_next_power_of_two(r->picture_height / 2) :
-         r->picture_height / 2;
+      template.width0 = r->buffer_width / 2;
+      template.height0 = r->buffer_height / 2;
    }
    else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
-      template.height0 = r->pot_buffers ?
-         util_next_power_of_two(r->picture_height / 2) :
-         r->picture_height / 2;
+      template.height0 = r->buffer_height / 2;
 
    r->textures.individual.cb =
       r->pipe->screen->resource_create(r->pipe->screen, &template);
@@ -941,44 +929,6 @@ flush_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE ty
    return num_macroblocks;
 }
 
-static void
-flush(struct vl_mpeg12_mc_renderer *r)
-{
-   unsigned num_verts[VL_NUM_MACROBLOCK_TYPES] = { 0 };
-   unsigned vb_start = 0, i;
-
-   assert(r);
-   assert(r->num_macroblocks == r->macroblocks_per_batch);
-
-   vl_idct_unmap_buffers(&r->idct_y);
-   vl_idct_unmap_buffers(&r->idct_cr);
-   vl_idct_unmap_buffers(&r->idct_cb);
-
-   vl_idct_flush(&r->idct_y);
-   vl_idct_flush(&r->idct_cr);
-   vl_idct_flush(&r->idct_cb);
-
-   upload_vertex_stream(r, num_verts);
-
-   r->pipe->bind_rasterizer_state(r->pipe, r->rs_state);
-   r->pipe->set_framebuffer_state(r->pipe, &r->fb_state);
-   r->pipe->set_viewport_state(r->pipe, &r->viewport);
-
-   for (i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) {
-      if (num_verts[i] > 0)
-         vb_start += flush_mbtype_handler(r, i, vb_start, num_verts[i]);
-   }
-
-
-   r->pipe->flush(r->pipe, PIPE_FLUSH_RENDER_CACHE, r->fence);
-
-   vl_idct_map_buffers(&r->idct_y);
-   vl_idct_map_buffers(&r->idct_cr);
-   vl_idct_map_buffers(&r->idct_cb);
-
-   r->num_macroblocks = 0;
-}
-
 static void
 update_render_target(struct vl_mpeg12_mc_renderer *r)
 {
@@ -1190,11 +1140,10 @@ texview_map_delete(const struct keymap *map,
 bool
 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            struct pipe_context *pipe,
-                           unsigned picture_width,
-                           unsigned picture_height,
+                           unsigned buffer_width,
+                           unsigned buffer_height,
                            enum pipe_video_chroma_format chroma_format,
-                           enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
-                           bool pot_buffers)
+                           enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode)
 {
    struct pipe_resource *idct_matrix;
 
@@ -1203,17 +1152,14 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
 
    /* TODO: Implement other policies */
    assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
-   /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
-   assert(pot_buffers);
 
    memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
 
    renderer->pipe = pipe;
-   renderer->picture_width = picture_width;
-   renderer->picture_height = picture_height;
+   renderer->buffer_width = buffer_width;
+   renderer->buffer_height = buffer_height;
    renderer->chroma_format = chroma_format;
    renderer->bufmode = bufmode;
-   renderer->pot_buffers = pot_buffers;
 
    renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
                                            texview_map_delete);
@@ -1301,16 +1247,13 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
 
    if (renderer->surface) {
       if (surface != renderer->surface) {
-         if (renderer->num_macroblocks > 0) {
-            flush(renderer);
-         }
-
          new_surface = true;
-      }
+      } else {
 
-      /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
-      assert(surface != renderer->surface || renderer->past == past);
-      assert(surface != renderer->surface || renderer->future == future);
+         /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
+         assert(renderer->past == past);
+         assert(renderer->future == future);
+      }
    }
    else
       new_surface = true;
@@ -1336,7 +1279,8 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
       num_macroblocks -= num_to_submit;
 
       if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
-         flush(renderer);
+         vl_mpeg12_mc_renderer_flush(renderer);
+
          /* Next time we get this surface it may have new ref frames */
          pipe_surface_reference(&renderer->surface, NULL);
          pipe_surface_reference(&renderer->past, NULL);
@@ -1344,3 +1288,43 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
       }
    }
 }
+
+void
+vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
+{
+   unsigned num_verts[VL_NUM_MACROBLOCK_TYPES] = { 0 };
+   unsigned vb_start = 0, i;
+
+   assert(renderer);
+   assert(renderer->num_macroblocks <= renderer->macroblocks_per_batch);
+
+   if (renderer->num_macroblocks == 0)
+      return;
+
+   vl_idct_unmap_buffers(&renderer->idct_y);
+   vl_idct_unmap_buffers(&renderer->idct_cr);
+   vl_idct_unmap_buffers(&renderer->idct_cb);
+
+   vl_idct_flush(&renderer->idct_y);
+   vl_idct_flush(&renderer->idct_cr);
+   vl_idct_flush(&renderer->idct_cb);
+
+   upload_vertex_stream(renderer, num_verts);
+
+   renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
+   renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
+   renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
+
+   for (i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) {
+      if (num_verts[i] > 0)
+         vb_start += flush_mbtype_handler(renderer, i, vb_start, num_verts[i]);
+   }
+
+   renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, renderer->fence);
+
+   vl_idct_map_buffers(&renderer->idct_y);
+   vl_idct_map_buffers(&renderer->idct_cr);
+   vl_idct_map_buffers(&renderer->idct_cb);
+
+   renderer->num_macroblocks = 0;
+}
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index fb169c7421e..0f110c0df35 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -71,11 +71,10 @@ struct vl_mc_mbtype_handler
 struct vl_mpeg12_mc_renderer
 {
    struct pipe_context *pipe;
-   unsigned picture_width;
-   unsigned picture_height;
+   unsigned buffer_width;
+   unsigned buffer_height;
    enum pipe_video_chroma_format chroma_format;
    enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode;
-   bool pot_buffers;
    unsigned macroblocks_per_batch;
 
    struct pipe_viewport_state viewport;
@@ -126,8 +125,7 @@ bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                                 unsigned picture_width,
                                 unsigned picture_height,
                                 enum pipe_video_chroma_format chroma_format,
-                                enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
-                                bool pot_buffers);
+                                enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode);
 
 void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 
@@ -139,4 +137,6 @@ void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *rend
                                               struct pipe_mpeg12_macroblock *mpeg12_macroblocks,
                                               struct pipe_fence_handle **fence);
 
+void vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer);
+
 #endif /* vl_mpeg12_mc_renderer_h */
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 3edab823677..35636d8475c 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -282,6 +282,8 @@ sp_mpeg12_render_picture(struct pipe_video_context     *vpipe,
    assert(dst_surface);
    assert(dst_area);
 
+   vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer);
+
    vl_compositor_render(&ctx->compositor, src_surface,
                         picture_type, src_area, dst_surface, dst_area, fence);
 }
@@ -325,7 +327,10 @@ sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
    assert(vpipe);
    assert(dt);
 
-   pipe_surface_reference(&ctx->decode_target, dt);
+   if (ctx->decode_target != dt) {
+      vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer);
+      pipe_surface_reference(&ctx->decode_target, dt);
+   }
 }
 
 static void
@@ -426,6 +431,7 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
                  bool pot_buffers,
                  enum pipe_format decode_format)
 {
+   unsigned buffer_width, buffer_height;
    struct sp_mpeg12_context *ctx;
 
    assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
@@ -435,6 +441,12 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
    if (!ctx)
       return NULL;
 
+   /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
+   assert(pot_buffers);
+
+   buffer_width = pot_buffers ? util_next_power_of_two(width) : width; 
+   buffer_height = pot_buffers ? util_next_power_of_two(height) : height; 
+
    ctx->base.profile = profile;
    ctx->base.chroma_format = chroma_format;
    ctx->base.width = width;
@@ -464,8 +476,8 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
    ctx->decode_format = decode_format;
 
    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
-                                   width, height, chroma_format,
-                                   bufmode, pot_buffers)) {
+                                   buffer_width, buffer_height, chroma_format,
+                                   bufmode)) {
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
-- 
cgit v1.2.3


From bfb4fb057d92869f98dc627d53d3e1b7d031d93f Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 7 Dec 2010 20:13:37 +0100
Subject: [g3dvl] move vertex buffer handling into vl_vertex_buffer.c

---
 src/gallium/auxiliary/vl/vl_idct.c               | 27 ++------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 84 ++++--------------------
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     | 38 ++++++++++-
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     |  7 +-
 4 files changed, 60 insertions(+), 96 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 6aae28d465c..7de778e1036 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -376,36 +376,19 @@ init_buffers(struct vl_idct *idct)
       idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
    }
 
-   idct->vertex_bufs.individual.quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks);
+   idct->vertex_bufs.individual.quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks, &vertex_elems[VS_I_RECT]);
 
    if(idct->vertex_bufs.individual.quad.buffer == NULL)
       return false;
 
-   idct->vertex_bufs.individual.pos.stride = sizeof(struct vertex2f);
-   idct->vertex_bufs.individual.pos.max_index = 4 * idct->max_blocks - 1;
-   idct->vertex_bufs.individual.pos.buffer_offset = 0;
-   idct->vertex_bufs.individual.pos.buffer = pipe_buffer_create
-   (
-      idct->pipe->screen,
-      PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vertex2f) * 4 * idct->max_blocks
-   );
+   /* Pos element */
+   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   idct->vertex_bufs.individual.pos = vl_vb_create_buffer(idct->pipe, idct->max_blocks, &vertex_elems[VS_I_VPOS], 1, 1);
 
    if(idct->vertex_bufs.individual.pos.buffer == NULL)
       return false;
 
-   /* Rect element */
-   vertex_elems[0].src_offset = 0;
-   vertex_elems[0].instance_divisor = 0;
-   vertex_elems[0].vertex_buffer_index = 0;
-   vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Pos element */
-   vertex_elems[1].src_offset = 0;
-   vertex_elems[1].instance_divisor = 0;
-   vertex_elems[1].vertex_buffer_index = 1;
-   vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
    idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
 
    return true;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 9983b47692d..dba49e5f087 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -648,99 +648,39 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
       r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
    }
 
-   r->vertex_bufs.individual.quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch);
-
-   r->vertex_bufs.individual.pos.stride = sizeof(struct vertex_stream_0);
-   r->vertex_bufs.individual.pos.max_index = 4 * r->macroblocks_per_batch - 1;
-   r->vertex_bufs.individual.pos.buffer_offset = 0;
-   /* XXX: Create with usage DYNAMIC or STREAM */
-   r->vertex_bufs.individual.pos.buffer = pipe_buffer_create
-   (
-      r->pipe->screen,
-      PIPE_BIND_VERTEX_BUFFER,
-      sizeof(struct vertex_stream_0) * 4 * r->macroblocks_per_batch
-   );
-
-   for (i = 0; i < 4; ++i) {
-      r->vertex_bufs.individual.mv[i].stride = sizeof(struct vertex2f);
-      r->vertex_bufs.individual.mv[i].max_index = 4 * r->macroblocks_per_batch - 1;
-      r->vertex_bufs.individual.mv[i].buffer_offset = 0;
-      /* XXX: Create with usage DYNAMIC or STREAM */
-      r->vertex_bufs.individual.mv[i].buffer = pipe_buffer_create
-      (
-         r->pipe->screen,
-         PIPE_BIND_VERTEX_BUFFER,
-         sizeof(struct vertex2f) * 4 * r->macroblocks_per_batch
-      );
-   }
-
    memset(&vertex_elems, 0, sizeof(vertex_elems));
 
-   /* Rectangle element */
-   vertex_elems[VS_I_RECT].src_offset = 0;
-   vertex_elems[VS_I_RECT].instance_divisor = 0;
-   vertex_elems[VS_I_RECT].vertex_buffer_index = 0;
-   vertex_elems[VS_I_RECT].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   r->vertex_bufs.individual.quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch, &vertex_elems[VS_I_RECT]);
 
    /* Position element */
-   vertex_elems[VS_I_VPOS].src_offset = 0;
-   vertex_elems[VS_I_VPOS].instance_divisor = 0;
-   vertex_elems[VS_I_VPOS].vertex_buffer_index = 1;
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* y, cr, cb empty block element top left block */
-   vertex_elems[VS_I_EB_0_0].src_offset = sizeof(float) * 2;
-   vertex_elems[VS_I_EB_0_0].instance_divisor = 0;
-   vertex_elems[VS_I_EB_0_0].vertex_buffer_index = 1;
    vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
 
    /* y, cr, cb empty block element top right block */
-   vertex_elems[VS_I_EB_0_1].src_offset = sizeof(float) * 5;
-   vertex_elems[VS_I_EB_0_1].instance_divisor = 0;
-   vertex_elems[VS_I_EB_0_1].vertex_buffer_index = 1;
    vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
 
    /* y, cr, cb empty block element bottom left block */
-   vertex_elems[VS_I_EB_1_0].src_offset = sizeof(float) * 8;
-   vertex_elems[VS_I_EB_1_0].instance_divisor = 0;
-   vertex_elems[VS_I_EB_1_0].vertex_buffer_index = 1;
    vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
 
    /* y, cr, cb empty block element bottom right block */
-   vertex_elems[VS_I_EB_1_1].src_offset = sizeof(float) * 11;
-   vertex_elems[VS_I_EB_1_1].instance_divisor = 0;
-   vertex_elems[VS_I_EB_1_1].vertex_buffer_index = 1;
    vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
 
    /* progressive=0.0f interlaced=1.0f */
-   vertex_elems[VS_I_INTERLACED].src_offset = sizeof(float) * 14;
-   vertex_elems[VS_I_INTERLACED].instance_divisor = 0;
-   vertex_elems[VS_I_INTERLACED].vertex_buffer_index = 1;
    vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
 
-   /* First ref surface top field texcoord element */
-   vertex_elems[VS_I_MV0].src_offset = 0;
-   vertex_elems[VS_I_MV0].instance_divisor = 0;
-   vertex_elems[VS_I_MV0].vertex_buffer_index = 2;
-   vertex_elems[VS_I_MV0].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* First ref surface bottom field texcoord element */
-   vertex_elems[VS_I_MV1].src_offset = 0;
-   vertex_elems[VS_I_MV1].instance_divisor = 0;
-   vertex_elems[VS_I_MV1].vertex_buffer_index = 3;
-   vertex_elems[VS_I_MV1].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Second ref surface top field texcoord element */
-   vertex_elems[VS_I_MV2].src_offset = 0;
-   vertex_elems[VS_I_MV2].instance_divisor = 0;
-   vertex_elems[VS_I_MV2].vertex_buffer_index = 4;
-   vertex_elems[VS_I_MV2].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   /* Second ref surface bottom field texcoord element */
-   vertex_elems[VS_I_MV3].src_offset = 0;
-   vertex_elems[VS_I_MV3].instance_divisor = 0;
-   vertex_elems[VS_I_MV3].vertex_buffer_index = 5;
-   vertex_elems[VS_I_MV3].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   r->vertex_bufs.individual.pos = vl_vb_create_buffer(
+      r->pipe, r->macroblocks_per_batch,
+      &vertex_elems[VS_I_VPOS], 6, 1);
+
+   for (i = 0; i < 4; ++i) {
+      /* motion vector 0..4 element */
+      vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R32G32_FLOAT;
+      r->vertex_bufs.individual.mv[i] = vl_vb_create_buffer(
+         r->pipe, r->macroblocks_per_batch,
+         &vertex_elems[VS_I_MV0 + i], 1, i + 2);
+   }
 
    for(i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i)
       init_mbtype_handler(r, i, vertex_elems);
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index dba69f9a6a2..4e0d704a23d 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -30,6 +30,7 @@
 #include <pipe/p_screen.h>
 #include <util/u_memory.h>
 #include <util/u_inlines.h>
+#include <util/u_format.h>
 #include "vl_vertex_buffers.h"
 #include "vl_types.h"
 
@@ -39,7 +40,7 @@ static const struct quadf const_quad = {
 };
 
 struct pipe_vertex_buffer
-vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks)
+vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks, struct pipe_vertex_element* element)
 {
    struct pipe_vertex_buffer quad;
    struct pipe_transfer *buf_transfer;
@@ -49,6 +50,13 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks)
 
    assert(pipe);
    assert(max_blocks);
+   assert(element);
+
+   /* setup rectangle element */
+   element->src_offset = 0;
+   element->instance_divisor = 0;
+   element->vertex_buffer_index = 0;
+   element->src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* create buffer */
    quad.stride = sizeof(struct vertex2f);
@@ -81,6 +89,34 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks)
    return quad;
 }
 
+struct pipe_vertex_buffer
+vl_vb_create_buffer(struct pipe_context *pipe, unsigned max_blocks,
+                    struct pipe_vertex_element *elements, unsigned num_elements,
+                    unsigned vertex_buffer_index)
+{
+   struct pipe_vertex_buffer buf;
+   unsigned i, size = 0;
+
+   for ( i = 0; i < num_elements; ++i ) {
+      elements[i].src_offset = size;
+      elements[i].instance_divisor = 0;
+      elements[i].vertex_buffer_index = vertex_buffer_index;
+      size += util_format_get_blocksize(elements[i].src_format);
+   }
+
+   buf.stride = size;
+   buf.max_index = 4 * max_blocks - 1;
+   buf.buffer_offset = 0;
+   buf.buffer = pipe_buffer_create
+   (
+      pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      size * 4 * max_blocks
+   );
+
+   return buf;
+}
+
 bool
 vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks, unsigned num_elements)
 {
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 8d7c0e6a275..cf71f029a79 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -38,7 +38,12 @@ struct vl_vertex_buffer
    float *buffer;
 };
 
-struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks);
+struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks,
+                                             struct pipe_vertex_element* element);
+
+struct pipe_vertex_buffer vl_vb_create_buffer(struct pipe_context *pipe, unsigned max_blocks,
+                                              struct pipe_vertex_element* elements, unsigned num_elements,
+                                              unsigned vertex_buffer_index);
 
 bool vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks, unsigned num_elements);
 
-- 
cgit v1.2.3


From 22b4acb2069a368e986805d3b43395172ebf9146 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 7 Dec 2010 21:23:14 +0100
Subject: [g3dvl] use buffer width instead of texture size or vs constants

---
 src/gallium/auxiliary/vl/vl_idct.c               |  7 ++-
 src/gallium/auxiliary/vl/vl_idct.h               |  7 +++
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 69 ++++++------------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  1 -
 4 files changed, 28 insertions(+), 56 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 7de778e1036..f19cfc38d7b 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -110,8 +110,8 @@ create_vert_shader(struct vl_idct *idct)
     *
     */
    scale = ureg_imm2f(shader,
-      (float)BLOCK_WIDTH / idct->destination->width0, 
-      (float)BLOCK_HEIGHT / idct->destination->height0);
+      (float)BLOCK_WIDTH / idct->buffer_width,
+      (float)BLOCK_HEIGHT / idct->buffer_height);
 
    ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
    ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
@@ -555,6 +555,9 @@ vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resour
    assert(idct && pipe && dst);
 
    idct->pipe = pipe;
+   idct->buffer_width = dst->width0;
+   idct->buffer_height = dst->height0;
+
    pipe_resource_reference(&idct->textures.individual.matrix, matrix);
    pipe_resource_reference(&idct->textures.individual.transpose, matrix);
    pipe_resource_reference(&idct->destination, dst);
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 96e3d267046..2e98f22bea2 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -31,10 +31,17 @@
 #include <pipe/p_state.h>
 #include "vl_vertex_buffers.h"
 
+struct vl_idct_buffer
+{
+};
+
 struct vl_idct
 {
    struct pipe_context *pipe;
 
+   unsigned buffer_width;
+   unsigned buffer_height;
+
    unsigned max_blocks;
 
    struct pipe_viewport_state viewport[2];
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index dba49e5f087..5ee9836f293 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -43,11 +43,6 @@
 #define BLOCK_WIDTH 8
 #define BLOCK_HEIGHT 8
 
-struct vertex_shader_consts
-{
-   struct vertex4f norm;
-};
-
 struct vertex_stream_0
 {
    struct vertex2f pos;
@@ -108,9 +103,9 @@ static void *
 create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
 {
    struct ureg_program *shader;
-   struct ureg_src norm, mbs;
+   struct ureg_src scale;
    struct ureg_src vrect, vpos, eb[2][2], interlaced, vmv[4];
-   struct ureg_dst scale, t_vpos, t_vtex;
+   struct ureg_dst t_vpos, t_vtex;
    struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2][2], o_interlaced, o_vmv[4];
    unsigned i, j, count, label;
 
@@ -118,10 +113,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    if (!shader)
       return NULL;
 
-   norm = ureg_DECL_constant(shader, 0);
-   mbs = ureg_imm2f(shader, MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT);
-
-   scale = ureg_DECL_temporary(shader);
    t_vpos = ureg_DECL_temporary(shader);
    t_vtex = ureg_DECL_temporary(shader);
 
@@ -156,7 +147,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    }
 
    /*
-    * scale = norm * mbs;
+    * scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
     *
     * t_vpos = (vpos + vrect) * scale
     * o_vpos.xy = t_vpos
@@ -181,15 +172,17 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
     * o_interlaced = interlaced
     *
     * if(count > 0) { // Apply motion vectors
-    *    scale = norm * 0.5;
+    *    scale = 0.5 / (dst.width, dst.height);
     *    o_vmv[0..count] = t_vpos + vmv[0..count] * scale
     * }
     *
     */
-   ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, mbs);
+   scale = ureg_imm2f(shader,
+      (float)MACROBLOCK_WIDTH / r->buffer_width,
+      (float)MACROBLOCK_HEIGHT / r->buffer_height);
 
    ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
-   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), ureg_src(scale));
+   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
@@ -201,9 +194,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
       ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
       ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
       ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex));
-      ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), ureg_src(scale));
+      ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), scale);
       ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
-      ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), ureg_src(scale));
+      ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), scale);
 
    ureg_ELSE(shader, &label);
 
@@ -221,14 +214,16 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_MOV(shader, o_interlaced, interlaced);
 
    if(count > 0) {
-      ureg_MUL(shader, ureg_writemask(scale, TGSI_WRITEMASK_XY), norm, ureg_imm1f(shader, 0.5f));
+      scale = ureg_imm2f(shader,
+         0.5f / r->buffer_width,
+         0.5f / r->buffer_height);
+
       for (i = 0; i < count; ++i)
-         ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), ureg_src(scale), vmv[i], ureg_src(t_vpos));
+         ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), scale, vmv[i], ureg_src(t_vpos));
    }
 
    ureg_release_temporary(shader, t_vtex);
    ureg_release_temporary(shader, t_vpos);
-   ureg_release_temporary(shader, scale);
 
    ureg_END(shader);
 
@@ -685,13 +680,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    for(i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i)
       init_mbtype_handler(r, i, vertex_elems);
 
-   r->vs_const_buf = pipe_buffer_create
-   (
-      r->pipe->screen,
-      PIPE_BIND_CONSTANT_BUFFER,
-      sizeof(struct vertex_shader_consts)
-   );
-
    return true;
 }
 
@@ -702,8 +690,6 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
 
    assert(r);
 
-   pipe_resource_reference(&r->vs_const_buf, NULL);
-
    for (i = 0; i < 3; ++i) {
       pipe_sampler_view_reference(&r->sampler_views.all[i], NULL);
       pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
@@ -869,29 +855,6 @@ flush_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE ty
    return num_macroblocks;
 }
 
-static void
-update_render_target(struct vl_mpeg12_mc_renderer *r)
-{
-   struct pipe_transfer *buf_transfer;
-   struct vertex_shader_consts *vs_consts;
-
-   vs_consts = pipe_buffer_map
-   (
-      r->pipe, r->vs_const_buf,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buf_transfer
-   );
-
-   vs_consts->norm.x = 1.0f / r->surface->width;
-   vs_consts->norm.y = 1.0f / r->surface->height;
-
-   pipe_buffer_unmap(r->pipe, r->vs_const_buf, buf_transfer);
-
-   r->fb_state.cbufs[0] = r->surface;
-
-   r->pipe->set_constant_buffer(r->pipe, PIPE_SHADER_VERTEX, 0, r->vs_const_buf);
-}
-
 static void
 get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4])
 {
@@ -1203,7 +1166,6 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
       pipe_surface_reference(&renderer->past, past);
       pipe_surface_reference(&renderer->future, future);
       renderer->fence = fence;
-      update_render_target(renderer);
    }
 
    while (num_macroblocks) {
@@ -1251,6 +1213,7 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
 
    upload_vertex_stream(renderer, num_verts);
 
+   renderer->fb_state.cbufs[0] = renderer->surface;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
    renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 0f110c0df35..6ce9cbe390c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -78,7 +78,6 @@ struct vl_mpeg12_mc_renderer
    unsigned macroblocks_per_batch;
 
    struct pipe_viewport_state viewport;
-   struct pipe_resource *vs_const_buf;
    struct pipe_framebuffer_state fb_state;
 
    struct vl_idct idct_y, idct_cb, idct_cr;
-- 
cgit v1.2.3


From 0b749d6dcb537472771d6fe6e454aafc916ab3fe Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 8 Dec 2010 02:12:24 +0100
Subject: [g3dvl] split idct code into state and buffers

---
 src/gallium/auxiliary/vl/vl_idct.c               | 390 +++++++++++++----------
 src/gallium/auxiliary/vl/vl_idct.h               |  45 +--
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 113 ++++---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   3 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     |  57 ++--
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     |  12 +-
 6 files changed, 343 insertions(+), 277 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index f19cfc38d7b..168e6e88b2b 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -197,9 +197,6 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2
 static void *
 create_transpose_frag_shader(struct vl_idct *idct)
 {
-   struct pipe_resource *transpose = idct->textures.individual.transpose;
-   struct pipe_resource *intermediate = idct->textures.individual.intermediate;
-
    struct ureg_program *shader;
 
    struct ureg_src block, tex, sampler[2];
@@ -221,8 +218,8 @@ create_transpose_frag_shader(struct vl_idct *idct)
    start[0] = ureg_imm1f(shader, 0.0f);
    start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
 
-   fetch_four(shader, l, block, sampler[0], start[0], block, false, false, transpose->width0);
-   fetch_four(shader, r, tex, sampler[1], start[1], block, true, false, intermediate->height0);
+   fetch_four(shader, l, block, sampler[0], start[0], block, false, false, BLOCK_WIDTH / 4);
+   fetch_four(shader, r, tex, sampler[1], start[1], block, true, false, idct->buffer_height / 4);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -244,9 +241,6 @@ create_transpose_frag_shader(struct vl_idct *idct)
 static void *
 create_matrix_frag_shader(struct vl_idct *idct)
 {
-   struct pipe_resource *matrix = idct->textures.individual.matrix;
-   struct pipe_resource *source = idct->textures.individual.source;
-
    struct ureg_program *shader;
 
    struct ureg_src tex, block, sampler[2];
@@ -278,21 +272,21 @@ create_matrix_frag_shader(struct vl_idct *idct)
 
    ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), tex);
    for (i = 0; i < 4; ++i) {
-      fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], block, false, false, source->width0);
+      fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], block, false, false, idct->buffer_width / 4);
       ureg_MUL(shader, l[i][0], ureg_src(l[i][0]), ureg_imm1f(shader, STAGE1_SCALE));
       ureg_MUL(shader, l[i][1], ureg_src(l[i][1]), ureg_imm1f(shader, STAGE1_SCALE));
       if(i != 3)
          ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), 
-            ureg_src(t_tc), ureg_imm1f(shader, 1.0f / source->height0));
+            ureg_src(t_tc), ureg_imm1f(shader, 1.0f / idct->buffer_height));
    }
    
    for (i = 0; i < NR_RENDER_TARGETS; ++i) {
 
 #if NR_RENDER_TARGETS == 8
       ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i));
-      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], block, true, true, matrix->width0);
+      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], block, true, true, BLOCK_WIDTH / 4);
 #elif NR_RENDER_TARGETS == 1
-      fetch_four(shader, r, block, sampler[1], start[1], block, true, true, matrix->width0);
+      fetch_four(shader, r, block, sampler[1], start[1], block, true, true, BLOCK_WIDTH / 4);
 #else
 #error invalid number of render targets
 #endif
@@ -339,13 +333,81 @@ cleanup_shaders(struct vl_idct *idct)
 }
 
 static bool
-init_buffers(struct vl_idct *idct)
+init_state(struct vl_idct *idct)
+{
+   struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
+   struct pipe_sampler_state sampler;
+   struct pipe_rasterizer_state rs_state;
+   unsigned i;
+
+   assert(idct);
+
+   idct->quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks);
+
+   if(idct->quad.buffer == NULL)
+      return false;
+
+   for (i = 0; i < 4; ++i) {
+      memset(&sampler, 0, sizeof(sampler));
+      sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+      sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+      sampler.compare_func = PIPE_FUNC_ALWAYS;
+      sampler.normalized_coords = 1;
+      /*sampler.shadow_ambient = ; */
+      /*sampler.lod_bias = ; */
+      sampler.min_lod = 0;
+      /*sampler.max_lod = ; */
+      /*sampler.border_color[0] = ; */
+      /*sampler.max_anisotropy = ; */
+      idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
+   }
+
+   memset(&rs_state, 0, sizeof(rs_state));
+   /*rs_state.sprite_coord_enable */
+   rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
+   rs_state.point_quad_rasterization = true;
+   rs_state.point_size = BLOCK_WIDTH;
+   rs_state.gl_rasterization_rules = false;
+   idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
+
+   vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
+
+   /* Pos element */
+   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   idct->vertex_buffer_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1);
+   idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
+
+   return true;
+}
+
+static void
+cleanup_state(struct vl_idct *idct)
+{
+   unsigned i;
+
+   for (i = 0; i < 4; ++i)
+      idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
+
+   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
+   idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
+}
+
+static bool
+init_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
    struct pipe_resource template;
    struct pipe_sampler_view sampler_view;
-   struct pipe_vertex_element vertex_elems[2];
    unsigned i;
 
+   assert(idct && buffer);
+
+   /* create textures */
    memset(&template, 0, sizeof(struct pipe_resource));
    template.last_level = 0;
    template.depth0 = 1;
@@ -354,149 +416,74 @@ init_buffers(struct vl_idct *idct)
 
    template.target = PIPE_TEXTURE_2D;
    template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
-   template.width0 = idct->destination->width0 / 4;
-   template.height0 = idct->destination->height0;
+   template.width0 = idct->buffer_width / 4;
+   template.height0 = idct->buffer_height;
    template.depth0 = 1;
    template.usage = PIPE_USAGE_STREAM;
-   idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
+   buffer->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
    template.target = PIPE_TEXTURE_3D;
    template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
-   template.width0 = idct->destination->width0 / NR_RENDER_TARGETS;
-   template.height0 = idct->destination->height0 / 4;
+   template.width0 = idct->buffer_width / NR_RENDER_TARGETS;
+   template.height0 = idct->buffer_height / 4;
    template.depth0 = NR_RENDER_TARGETS;
    template.usage = PIPE_USAGE_STATIC;
-   idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
+   buffer->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
    for (i = 0; i < 4; ++i) {
-      if(idct->textures.all[i] == NULL)
+      if(buffer->textures.all[i] == NULL)
          return false; /* a texture failed to allocate */
 
-      u_sampler_view_default_template(&sampler_view, idct->textures.all[i], idct->textures.all[i]->format);
-      idct->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, idct->textures.all[i], &sampler_view);
+      u_sampler_view_default_template(&sampler_view, buffer->textures.all[i], buffer->textures.all[i]->format);
+      buffer->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, buffer->textures.all[i], &sampler_view);
    }
 
-   idct->vertex_bufs.individual.quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks, &vertex_elems[VS_I_RECT]);
-
-   if(idct->vertex_bufs.individual.quad.buffer == NULL)
-      return false;
-
-   /* Pos element */
-   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
-
-   idct->vertex_bufs.individual.pos = vl_vb_create_buffer(idct->pipe, idct->max_blocks, &vertex_elems[VS_I_VPOS], 1, 1);
-
-   if(idct->vertex_bufs.individual.pos.buffer == NULL)
-      return false;
-
-   idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
-
    return true;
 }
 
 static void
-cleanup_buffers(struct vl_idct *idct)
+cleanup_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
    unsigned i;
 
-   assert(idct);
+   assert(idct && buffer);
 
    for (i = 0; i < 4; ++i) {
-      pipe_sampler_view_reference(&idct->sampler_views.all[i], NULL);
-      pipe_resource_reference(&idct->textures.all[i], NULL);
+      pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
+      pipe_resource_reference(&buffer->textures.all[i], NULL);
    }
-
-   idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
-   pipe_resource_reference(&idct->vertex_bufs.individual.quad.buffer, NULL);
-   pipe_resource_reference(&idct->vertex_bufs.individual.pos.buffer, NULL);
 }
 
-static void
-init_state(struct vl_idct *idct)
+static bool
+init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
-   struct pipe_sampler_state sampler;
-   struct pipe_rasterizer_state rs_state;
-   unsigned i;
-
-   idct->viewport[0].scale[0] = idct->textures.individual.intermediate->width0;
-   idct->viewport[0].scale[1] = idct->textures.individual.intermediate->height0;
+   assert(idct && buffer);
 
-   idct->viewport[1].scale[0] = idct->destination->width0;
-   idct->viewport[1].scale[1] = idct->destination->height0;
+   buffer->vertex_bufs.individual.quad.stride = idct->quad.stride;
+   buffer->vertex_bufs.individual.quad.max_index = idct->quad.max_index;
+   buffer->vertex_bufs.individual.quad.buffer_offset = idct->quad.buffer_offset;
+   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, idct->quad.buffer);
 
-   idct->fb_state[0].width = idct->textures.individual.intermediate->width0;
-   idct->fb_state[0].height = idct->textures.individual.intermediate->height0;
+   buffer->vertex_bufs.individual.pos = vl_vb_create_buffer(idct->pipe, idct->max_blocks, idct->vertex_buffer_stride);
 
-   idct->fb_state[0].nr_cbufs = NR_RENDER_TARGETS;
-   for(i = 0; i < NR_RENDER_TARGETS; ++i) {
-      idct->fb_state[0].cbufs[i] = idct->pipe->screen->get_tex_surface(
-         idct->pipe->screen, idct->textures.individual.intermediate, 0, 0, i,
-         PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
-   }
-
-   idct->fb_state[1].width = idct->destination->width0;
-   idct->fb_state[1].height = idct->destination->height0;
-
-   idct->fb_state[1].nr_cbufs = 1;
-   idct->fb_state[1].cbufs[0] = idct->pipe->screen->get_tex_surface(
-      idct->pipe->screen, idct->destination, 0, 0, 0,
-      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
-
-   for(i = 0; i < 2; ++i) {
-      idct->viewport[i].scale[2] = 1;
-      idct->viewport[i].scale[3] = 1;
-      idct->viewport[i].translate[0] = 0;
-      idct->viewport[i].translate[1] = 0;
-      idct->viewport[i].translate[2] = 0;
-      idct->viewport[i].translate[3] = 0;
-
-      idct->fb_state[i].zsbuf = NULL;
-   }
+   if(buffer->vertex_bufs.individual.pos.buffer == NULL)
+      return false;
 
-   for (i = 0; i < 4; ++i) {
-      memset(&sampler, 0, sizeof(sampler));
-      sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-      sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
-      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
-      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
-      sampler.compare_func = PIPE_FUNC_ALWAYS;
-      sampler.normalized_coords = 1;
-      /*sampler.shadow_ambient = ; */
-      /*sampler.lod_bias = ; */
-      sampler.min_lod = 0;
-      /*sampler.max_lod = ; */
-      /*sampler.border_color[0] = ; */
-      /*sampler.max_anisotropy = ; */
-      idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
-   }
+   if (!vl_vb_init(&buffer->blocks, idct->max_blocks, 2))
+      return false;
 
-   memset(&rs_state, 0, sizeof(rs_state));
-   /*rs_state.sprite_coord_enable */
-   rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
-   rs_state.point_quad_rasterization = true;
-   rs_state.point_size = BLOCK_WIDTH;
-   rs_state.gl_rasterization_rules = false;
-   idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
+   return true;
 }
 
 static void
-cleanup_state(struct vl_idct *idct)
+cleanup_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
-   unsigned i;
+   assert(idct && buffer);
 
-   for(i = 0; i < NR_RENDER_TARGETS; ++i) {
-      idct->pipe->screen->tex_surface_destroy(idct->fb_state[0].cbufs[i]);
-   }
+   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL);
+   pipe_resource_reference(&buffer->vertex_bufs.individual.pos.buffer, NULL);
 
-   idct->pipe->screen->tex_surface_destroy(idct->fb_state[1].cbufs[0]);
-
-   for (i = 0; i < 4; ++i)
-      idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
-
-   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
+   vl_vb_cleanup(&buffer->blocks);
 }
 
 struct pipe_resource *
@@ -549,85 +536,142 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    return matrix;
 }
 
-bool
-vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst, struct pipe_resource *matrix)
+bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, 
+                  unsigned buffer_width, unsigned buffer_height,
+                  struct pipe_resource *matrix)
 {
-   assert(idct && pipe && dst);
+   assert(idct && pipe && matrix);
 
    idct->pipe = pipe;
-   idct->buffer_width = dst->width0;
-   idct->buffer_height = dst->height0;
-
-   pipe_resource_reference(&idct->textures.individual.matrix, matrix);
-   pipe_resource_reference(&idct->textures.individual.transpose, matrix);
-   pipe_resource_reference(&idct->destination, dst);
+   idct->buffer_width = buffer_width;
+   idct->buffer_height = buffer_height;
+   pipe_resource_reference(&idct->matrix, matrix);
 
    idct->max_blocks =
-      align(idct->destination->width0, BLOCK_WIDTH) / BLOCK_WIDTH *
-      align(idct->destination->height0, BLOCK_HEIGHT) / BLOCK_HEIGHT *
-      idct->destination->depth0;
+      align(buffer_width, BLOCK_WIDTH) / BLOCK_WIDTH *
+      align(buffer_height, BLOCK_HEIGHT) / BLOCK_HEIGHT;
 
-   if(!init_buffers(idct))
+   if(!init_shaders(idct))
       return false;
 
-   if(!init_shaders(idct)) {
-      cleanup_buffers(idct);
+   if(!init_state(idct)) {
+      cleanup_shaders(idct);
       return false;
    }
 
-   if(!vl_vb_init(&idct->blocks, idct->max_blocks, 2)) {
-      cleanup_shaders(idct);
-      cleanup_buffers(idct);
+   return true;
+}
+
+void
+vl_idct_cleanup(struct vl_idct *idct)
+{
+   cleanup_shaders(idct);
+   cleanup_state(idct);
+
+   pipe_resource_reference(&idct->matrix, NULL);
+}
+
+bool
+vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_resource *dst)
+{
+   unsigned i;
+
+   assert(buffer);
+   assert(idct);
+   assert(dst);
+
+   pipe_resource_reference(&buffer->textures.individual.matrix, idct->matrix);
+   pipe_resource_reference(&buffer->textures.individual.transpose, idct->matrix);
+   pipe_resource_reference(&buffer->destination, dst);
+
+   if (!init_textures(idct, buffer))
       return false;
+
+   if (!init_vertex_buffers(idct, buffer))
+      return false;
+
+   /* init state */
+   buffer->viewport[0].scale[0] = buffer->textures.individual.intermediate->width0;
+   buffer->viewport[0].scale[1] = buffer->textures.individual.intermediate->height0;
+
+   buffer->viewport[1].scale[0] = buffer->destination->width0;
+   buffer->viewport[1].scale[1] = buffer->destination->height0;
+
+   buffer->fb_state[0].width = buffer->textures.individual.intermediate->width0;
+   buffer->fb_state[0].height = buffer->textures.individual.intermediate->height0;
+
+   buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS;
+   for(i = 0; i < NR_RENDER_TARGETS; ++i) {
+      buffer->fb_state[0].cbufs[i] = idct->pipe->screen->get_tex_surface(
+         idct->pipe->screen, buffer->textures.individual.intermediate, 0, 0, i,
+         PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
    }
 
-   init_state(idct);
+   buffer->fb_state[1].width = buffer->destination->width0;
+   buffer->fb_state[1].height = buffer->destination->height0;
 
-   vl_idct_map_buffers(idct);
+   buffer->fb_state[1].nr_cbufs = 1;
+   buffer->fb_state[1].cbufs[0] = idct->pipe->screen->get_tex_surface(
+      idct->pipe->screen, buffer->destination, 0, 0, 0,
+      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
+
+   for(i = 0; i < 2; ++i) {
+      buffer->viewport[i].scale[2] = 1;
+      buffer->viewport[i].scale[3] = 1;
+      buffer->viewport[i].translate[0] = 0;
+      buffer->viewport[i].translate[1] = 0;
+      buffer->viewport[i].translate[2] = 0;
+      buffer->viewport[i].translate[3] = 0;
+
+      buffer->fb_state[i].zsbuf = NULL;
+   }
 
    return true;
 }
 
 void
-vl_idct_cleanup(struct vl_idct *idct)
+vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
-   vl_idct_unmap_buffers(idct);
+   unsigned i;
 
-   vl_vb_cleanup(&idct->blocks);
-   cleanup_shaders(idct);
-   cleanup_buffers(idct);
+   assert(buffer);
 
-   cleanup_state(idct);
+   for(i = 0; i < NR_RENDER_TARGETS; ++i) {
+      idct->pipe->screen->tex_surface_destroy(buffer->fb_state[0].cbufs[i]);
+   }
+
+   idct->pipe->screen->tex_surface_destroy(buffer->fb_state[1].cbufs[0]);
 
-   pipe_resource_reference(&idct->destination, NULL);
+   cleanup_textures(idct, buffer);
+   cleanup_vertex_buffers(idct, buffer);
 }
 
 void
-vl_idct_map_buffers(struct vl_idct *idct)
+vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
    assert(idct);
 
    struct pipe_box rect =
    {
       0, 0, 0,
-      idct->textures.individual.source->width0,
-      idct->textures.individual.source->height0,
+      buffer->textures.individual.source->width0,
+      buffer->textures.individual.source->height0,
       1
    };
 
-   idct->tex_transfer = idct->pipe->get_transfer
+   buffer->tex_transfer = idct->pipe->get_transfer
    (
-      idct->pipe, idct->textures.individual.source,
+      idct->pipe, buffer->textures.individual.source,
       u_subresource(0, 0),
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &rect
    );
 
-   idct->texels = idct->pipe->transfer_map(idct->pipe, idct->tex_transfer);
+   buffer->texels = idct->pipe->transfer_map(idct->pipe, buffer->tex_transfer);
 }
 
 void
-vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
+vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block)
 {
    struct vertex2f v;
    unsigned tex_pitch;
@@ -635,30 +679,30 @@ vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 
    unsigned i;
 
-   assert(idct);
+   assert(buffer);
 
-   tex_pitch = idct->tex_transfer->stride / sizeof(short);
-   texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
+   tex_pitch = buffer->tex_transfer->stride / sizeof(short);
+   texels = buffer->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
 
    for (i = 0; i < BLOCK_HEIGHT; ++i)
       memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
 
    v.x = x;
    v.y = y;
-   vl_vb_add_block(&idct->blocks, (float*)&v);
+   vl_vb_add_block(&buffer->blocks, (float*)&v);
 }
 
 void
-vl_idct_unmap_buffers(struct vl_idct *idct)
+vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
-   assert(idct);
+   assert(idct && buffer);
 
-   idct->pipe->transfer_unmap(idct->pipe, idct->tex_transfer);
-   idct->pipe->transfer_destroy(idct->pipe, idct->tex_transfer);
+   idct->pipe->transfer_unmap(idct->pipe, buffer->tex_transfer);
+   idct->pipe->transfer_destroy(idct->pipe, buffer->tex_transfer);
 }
 
 void
-vl_idct_flush(struct vl_idct *idct)
+vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
    struct pipe_transfer *vec_transfer;
    void *vectors;
@@ -669,34 +713,34 @@ vl_idct_flush(struct vl_idct *idct)
    vectors = pipe_buffer_map
    (
       idct->pipe,
-      idct->vertex_bufs.individual.pos.buffer,
+      buffer->vertex_bufs.individual.pos.buffer,
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &vec_transfer
    );
 
-   num_verts = vl_vb_upload(&idct->blocks, vectors);
+   num_verts = vl_vb_upload(&buffer->blocks, vectors);
 
-   pipe_buffer_unmap(idct->pipe, idct->vertex_bufs.individual.pos.buffer, vec_transfer);
+   pipe_buffer_unmap(idct->pipe, buffer->vertex_bufs.individual.pos.buffer, vec_transfer);
 
    if(num_verts > 0) {
 
       idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
-      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
+      idct->pipe->set_vertex_buffers(idct->pipe, 2, buffer->vertex_bufs.all);
       idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
       idct->pipe->bind_vs_state(idct->pipe, idct->vs);
 
       /* first stage */
-      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[0]);
-      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[0]);
-      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[0]);
+      idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]);
+      idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]);
+      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
       idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts);
 
       /* second stage */
-      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state[1]);
-      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport[1]);
-      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, idct->sampler_views.stage[1]);
+      idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]);
+      idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]);
+      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
       idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts);
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 2e98f22bea2..6076bdec46c 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -31,10 +31,6 @@
 #include <pipe/p_state.h>
 #include "vl_vertex_buffers.h"
 
-struct vl_idct_buffer
-{
-};
-
 struct vl_idct
 {
    struct pipe_context *pipe;
@@ -44,13 +40,7 @@ struct vl_idct
 
    unsigned max_blocks;
 
-   struct pipe_viewport_state viewport[2];
-   struct pipe_framebuffer_state fb_state[2];
-
-   struct pipe_resource *destination;
-
    void *rs_state;
-
    void *vertex_elems_state;
 
    union
@@ -63,6 +53,22 @@ struct vl_idct
       } individual;
    } samplers;
 
+   void *vs;
+   void *matrix_fs, *transpose_fs;
+
+   struct pipe_resource *matrix;
+   struct pipe_vertex_buffer quad;
+
+   unsigned vertex_buffer_stride;
+};
+
+struct vl_idct_buffer
+{
+   struct pipe_viewport_state viewport[2];
+   struct pipe_framebuffer_state fb_state[2];
+
+   struct pipe_resource *destination;
+
    union
    {
       struct pipe_sampler_view *all[4];
@@ -73,9 +79,6 @@ struct vl_idct
       } individual;
    } sampler_views;
 
-   void *vs;
-   void *matrix_fs, *transpose_fs;
-
    union
    {
       struct pipe_resource *all[4];
@@ -100,16 +103,22 @@ struct vl_idct
 
 struct pipe_resource *vl_idct_upload_matrix(struct pipe_context *pipe);
 
-bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst, struct pipe_resource *matrix);
+bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, 
+                  unsigned buffer_width, unsigned buffer_height, 
+                  struct pipe_resource *matrix);
 
 void vl_idct_cleanup(struct vl_idct *idct);
 
-void vl_idct_map_buffers(struct vl_idct *idct);
+bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_resource *dst);
+
+void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
+
+void vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
-void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block);
+void vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block);
 
-void vl_idct_unmap_buffers(struct vl_idct *idct);
+void vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
-void vl_idct_flush(struct vl_idct *idct);
+void vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
 #endif
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 5ee9836f293..28de8c9a81a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -593,6 +593,7 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
 static bool
 init_buffers(struct vl_mpeg12_mc_renderer *r)
 {
+   struct pipe_resource *idct_matrix;
    struct pipe_resource template;
    struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
    struct pipe_sampler_view sampler_view;
@@ -602,7 +603,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    const unsigned mbh =
       align(r->buffer_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
 
-   unsigned i;
+   unsigned i, stride;
 
    assert(r);
 
@@ -624,6 +625,17 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
 
    r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
 
+   if (!(idct_matrix = vl_idct_upload_matrix(r->pipe)))
+      return false;
+
+   if (!vl_idct_init(&r->idct_luma, r->pipe, r->buffer_width, r->buffer_height, idct_matrix))
+      return false;
+
+   if (!vl_idct_init_buffer(&r->idct_luma, &r->idct_y, r->textures.individual.y))
+      return false;
+
+   vl_idct_map_buffers(&r->idct_luma, &r->idct_y);
+
    if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
       template.width0 = r->buffer_width / 2;
       template.height0 = r->buffer_height / 2;
@@ -636,6 +648,19 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    r->textures.individual.cr =
       r->pipe->screen->resource_create(r->pipe->screen, &template);
 
+   if(!vl_idct_init(&r->idct_chroma, r->pipe, template.width0, template.height0, idct_matrix))
+      return false;
+
+   if (!vl_idct_init_buffer(&r->idct_chroma, &r->idct_cb, r->textures.individual.cb))
+      return false;
+
+   vl_idct_map_buffers(&r->idct_chroma, &r->idct_cb);
+
+   if (!vl_idct_init_buffer(&r->idct_chroma, &r->idct_cr, r->textures.individual.cr))
+      return false;
+
+   vl_idct_map_buffers(&r->idct_chroma, &r->idct_cr);
+
    for (i = 0; i < 3; ++i) {
       u_sampler_view_default_template(&sampler_view,
                                       r->textures.all[i],
@@ -645,7 +670,8 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
 
    memset(&vertex_elems, 0, sizeof(vertex_elems));
 
-   r->vertex_bufs.individual.quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch, &vertex_elems[VS_I_RECT]);
+   vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
+   r->vertex_bufs.individual.quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch);
 
    /* Position element */
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
@@ -665,16 +691,17 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    /* progressive=0.0f interlaced=1.0f */
    vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
 
+   stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 6, 1);
+
    r->vertex_bufs.individual.pos = vl_vb_create_buffer(
-      r->pipe, r->macroblocks_per_batch,
-      &vertex_elems[VS_I_VPOS], 6, 1);
+      r->pipe, r->macroblocks_per_batch, stride);
 
    for (i = 0; i < 4; ++i) {
       /* motion vector 0..4 element */
       vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R32G32_FLOAT;
+      stride = vl_vb_element_helper(&vertex_elems[VS_I_MV0 + i], 1, i + 2);
       r->vertex_bufs.individual.mv[i] = vl_vb_create_buffer(
-         r->pipe, r->macroblocks_per_batch,
-         &vertex_elems[VS_I_MV0 + i], 1, i + 2);
+         r->pipe, r->macroblocks_per_batch, stride);
    }
 
    for(i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i)
@@ -698,6 +725,17 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
 
    for(i = 0; i<VL_NUM_MACROBLOCK_TYPES; ++i)
       cleanup_mbtype_handler(r, i);
+
+   vl_idct_unmap_buffers(&r->idct_luma, &r->idct_y);
+   vl_idct_unmap_buffers(&r->idct_chroma, &r->idct_cb);
+   vl_idct_unmap_buffers(&r->idct_chroma, &r->idct_cr);
+
+   vl_idct_cleanup_buffer(&r->idct_luma, &r->idct_y);
+   vl_idct_cleanup_buffer(&r->idct_chroma, &r->idct_cb);
+   vl_idct_cleanup_buffer(&r->idct_chroma, &r->idct_cr);
+
+   vl_idct_cleanup(&r->idct_luma);
+   vl_idct_cleanup(&r->idct_chroma);
 }
 
 static enum VL_MACROBLOCK_TYPE
@@ -1048,8 +1086,6 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            enum pipe_video_chroma_format chroma_format,
                            enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode)
 {
-   struct pipe_resource *idct_matrix;
-
    assert(renderer);
    assert(pipe);
 
@@ -1080,30 +1116,8 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    renderer->future = NULL;
    renderer->num_macroblocks = 0;
 
-   if(!(idct_matrix = vl_idct_upload_matrix(pipe)))
-      goto error_idct_matrix;
-
-   if(!vl_idct_init(&renderer->idct_y, pipe, renderer->textures.individual.y, idct_matrix))
-      goto error_idct_y;
-
-   if(!vl_idct_init(&renderer->idct_cr, pipe, renderer->textures.individual.cr, idct_matrix))
-      goto error_idct_cr;
-
-   if(!vl_idct_init(&renderer->idct_cb, pipe, renderer->textures.individual.cb, idct_matrix))
-      goto error_idct_cb;
-
    return true;
 
-error_idct_cb:
-   vl_idct_cleanup(&renderer->idct_cr);
-
-error_idct_cr:
-   vl_idct_cleanup(&renderer->idct_y);
-
-error_idct_y:
-error_idct_matrix:
-   cleanup_buffers(renderer);
-
 error_buffers:
    cleanup_pipe_state(renderer);
 
@@ -1117,10 +1131,6 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 {
    assert(renderer);
 
-   vl_idct_cleanup(&renderer->idct_y);
-   vl_idct_cleanup(&renderer->idct_cr);
-   vl_idct_cleanup(&renderer->idct_cb);
-
    util_delete_keymap(renderer->texview_map, renderer->pipe);
    cleanup_pipe_state(renderer);
    cleanup_buffers(renderer);
@@ -1141,27 +1151,12 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
                                          *mpeg12_macroblocks,
                                          struct pipe_fence_handle **fence)
 {
-   bool new_surface = false;
-
    assert(renderer);
    assert(surface);
    assert(num_macroblocks);
    assert(mpeg12_macroblocks);
 
-   if (renderer->surface) {
-      if (surface != renderer->surface) {
-         new_surface = true;
-      } else {
-
-         /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
-         assert(renderer->past == past);
-         assert(renderer->future == future);
-      }
-   }
-   else
-      new_surface = true;
-
-   if (new_surface) {
+   if (surface != renderer->surface) {
       pipe_surface_reference(&renderer->surface, surface);
       pipe_surface_reference(&renderer->past, past);
       pipe_surface_reference(&renderer->future, future);
@@ -1203,13 +1198,13 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
    if (renderer->num_macroblocks == 0)
       return;
 
-   vl_idct_unmap_buffers(&renderer->idct_y);
-   vl_idct_unmap_buffers(&renderer->idct_cr);
-   vl_idct_unmap_buffers(&renderer->idct_cb);
+   vl_idct_unmap_buffers(&renderer->idct_luma, &renderer->idct_y);
+   vl_idct_unmap_buffers(&renderer->idct_chroma, &renderer->idct_cr);
+   vl_idct_unmap_buffers(&renderer->idct_chroma, &renderer->idct_cb);
 
-   vl_idct_flush(&renderer->idct_y);
-   vl_idct_flush(&renderer->idct_cr);
-   vl_idct_flush(&renderer->idct_cb);
+   vl_idct_flush(&renderer->idct_luma, &renderer->idct_y);
+   vl_idct_flush(&renderer->idct_chroma, &renderer->idct_cr);
+   vl_idct_flush(&renderer->idct_chroma, &renderer->idct_cb);
 
    upload_vertex_stream(renderer, num_verts);
 
@@ -1225,9 +1220,9 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
 
    renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, renderer->fence);
 
-   vl_idct_map_buffers(&renderer->idct_y);
-   vl_idct_map_buffers(&renderer->idct_cr);
-   vl_idct_map_buffers(&renderer->idct_cb);
+   vl_idct_map_buffers(&renderer->idct_luma, &renderer->idct_y);
+   vl_idct_map_buffers(&renderer->idct_chroma, &renderer->idct_cr);
+   vl_idct_map_buffers(&renderer->idct_chroma, &renderer->idct_cb);
 
    renderer->num_macroblocks = 0;
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 6ce9cbe390c..f1c7a2a4012 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -80,7 +80,8 @@ struct vl_mpeg12_mc_renderer
    struct pipe_viewport_state viewport;
    struct pipe_framebuffer_state fb_state;
 
-   struct vl_idct idct_y, idct_cb, idct_cr;
+   struct vl_idct idct_luma, idct_chroma;
+   struct vl_idct_buffer idct_y, idct_cb, idct_cr;
 
    void *rs_state;
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 4e0d704a23d..4647685053b 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -40,7 +40,7 @@ static const struct quadf const_quad = {
 };
 
 struct pipe_vertex_buffer
-vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks, struct pipe_vertex_element* element)
+vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks)
 {
    struct pipe_vertex_buffer quad;
    struct pipe_transfer *buf_transfer;
@@ -50,13 +50,6 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks, struct pipe_v
 
    assert(pipe);
    assert(max_blocks);
-   assert(element);
-
-   /* setup rectangle element */
-   element->src_offset = 0;
-   element->instance_divisor = 0;
-   element->vertex_buffer_index = 0;
-   element->src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    /* create buffer */
    quad.stride = sizeof(struct vertex2f);
@@ -89,34 +82,56 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks, struct pipe_v
    return quad;
 }
 
+struct pipe_vertex_element
+vl_vb_get_quad_vertex_element()
+{
+   struct pipe_vertex_element element;
+
+   /* setup rectangle element */
+   element.src_offset = 0;
+   element.instance_divisor = 0;
+   element.vertex_buffer_index = 0;
+   element.src_format = PIPE_FORMAT_R32G32_FLOAT;
+
+   return element;
+}
+
 struct pipe_vertex_buffer
-vl_vb_create_buffer(struct pipe_context *pipe, unsigned max_blocks,
-                    struct pipe_vertex_element *elements, unsigned num_elements,
-                    unsigned vertex_buffer_index)
+vl_vb_create_buffer(struct pipe_context *pipe, unsigned max_blocks, unsigned stride)
 {
    struct pipe_vertex_buffer buf;
-   unsigned i, size = 0;
-
-   for ( i = 0; i < num_elements; ++i ) {
-      elements[i].src_offset = size;
-      elements[i].instance_divisor = 0;
-      elements[i].vertex_buffer_index = vertex_buffer_index;
-      size += util_format_get_blocksize(elements[i].src_format);
-   }
 
-   buf.stride = size;
+   buf.stride = stride;
    buf.max_index = 4 * max_blocks - 1;
    buf.buffer_offset = 0;
    buf.buffer = pipe_buffer_create
    (
       pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
-      size * 4 * max_blocks
+      stride * 4 * max_blocks
    );
 
    return buf;
 }
 
+unsigned
+vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements,
+                              unsigned vertex_buffer_index)
+{
+   unsigned i, offset = 0;
+
+   assert(elements && num_elements);
+
+   for ( i = 0; i < num_elements; ++i ) {
+      elements[i].src_offset = offset;
+      elements[i].instance_divisor = 0;
+      elements[i].vertex_buffer_index = vertex_buffer_index;
+      offset += util_format_get_blocksize(elements[i].src_format);
+   }
+
+   return offset;
+}
+
 bool
 vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks, unsigned num_elements)
 {
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index cf71f029a79..dae80fa767b 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -38,12 +38,14 @@ struct vl_vertex_buffer
    float *buffer;
 };
 
-struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks,
-                                             struct pipe_vertex_element* element);
+struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks);
 
-struct pipe_vertex_buffer vl_vb_create_buffer(struct pipe_context *pipe, unsigned max_blocks,
-                                              struct pipe_vertex_element* elements, unsigned num_elements,
-                                              unsigned vertex_buffer_index);
+struct pipe_vertex_element vl_vb_get_quad_vertex_element();
+
+struct pipe_vertex_buffer vl_vb_create_buffer(struct pipe_context *pipe, unsigned max_blocks, unsigned stride);
+
+unsigned vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements,
+                              unsigned vertex_buffer_index);
 
 bool vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks, unsigned num_elements);
 
-- 
cgit v1.2.3


From d8d8939dd5d79600807b3c64310a45d7e18c408e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 8 Dec 2010 14:50:28 +0100
Subject: [g3dvl] split empty block handling

Empty block handling is split between vertex shader (x-axis)
and fragment shader (y-axis).
---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 84 ++++++++++--------------
 1 file changed, 34 insertions(+), 50 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 28de8c9a81a..a701e976d50 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -78,11 +78,8 @@ enum VS_OUTPUT
    VS_O_TEX0,
    VS_O_TEX1,
    VS_O_TEX2,
-   VS_O_EB_0_0,
-   VS_O_EB_0_1,
-   VS_O_EB_1_0,
-   VS_O_EB_1_1,
-   VS_O_INTERLACED,
+   VS_O_EB_0,
+   VS_O_EB_1,
    VS_O_MV0,
    VS_O_MV1,
    VS_O_MV2,
@@ -106,7 +103,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    struct ureg_src scale;
    struct ureg_src vrect, vpos, eb[2][2], interlaced, vmv[4];
    struct ureg_dst t_vpos, t_vtex;
-   struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2][2], o_interlaced, o_vmv[4];
+   struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4];
    unsigned i, j, count, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -129,11 +126,8 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
    o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
    o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);   
-   o_eb[0][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_0);
-   o_eb[0][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_1);
-   o_eb[1][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_0);
-   o_eb[1][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_1);
-   o_interlaced = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INTERLACED);
+   o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0);
+   o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1);
    
    count=0;
    for (i = 0; i < ref_frames; ++i) {
@@ -153,7 +147,8 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
     * o_vpos.xy = t_vpos
     * o_vpos.zw = vpos
     *
-    * o_line = vpos * 8
+    * o_line.xy = vrect * 8
+    * o_line.z = interlaced
     *
     * if(interlaced) {
     *    t_vtex.x = vrect.x
@@ -168,8 +163,8 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
     *    o_vtex[0..1].xy = t_vpos
     * }
     * o_vtex[2].xy = t_vpos
-    * o_eb[0..1][0..1] = eb[0..1][0..1]
-    * o_interlaced = interlaced
+    *
+    * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0]
     *
     * if(count > 0) { // Apply motion vectors
     *    scale = 0.5 / (dst.width, dst.height);
@@ -188,6 +183,8 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 
    ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_XY), vrect, 
       ureg_imm2f(shader, MACROBLOCK_WIDTH / 2, MACROBLOCK_HEIGHT / 2));
+   ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Z), 
+      ureg_scalar(interlaced, TGSI_SWIZZLE_X));
 
    ureg_IF(shader, interlaced, &label);
 
@@ -206,12 +203,12 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    ureg_ENDIF(shader);
    ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
 
-   ureg_MOV(shader, o_eb[0][0], eb[0][0]);
-   ureg_MOV(shader, o_eb[0][1], eb[0][1]);
-   ureg_MOV(shader, o_eb[1][0], eb[1][0]);
-   ureg_MOV(shader, o_eb[1][1], eb[1][1]);
-
-   ureg_MOV(shader, o_interlaced, interlaced);
+   ureg_CMP(shader, ureg_writemask(o_eb[0], TGSI_WRITEMASK_XYZ),
+            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
+            eb[0][1], eb[0][0]);
+   ureg_CMP(shader, ureg_writemask(o_eb[1], TGSI_WRITEMASK_XYZ),
+            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
+            eb[1][1], eb[1][0]);
 
    if(count > 0) {
       scale = ureg_imm2f(shader,
@@ -240,15 +237,21 @@ calc_field(struct ureg_program *shader)
    line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
 
    /*
-    * line going from 0 to 8 in steps of 0.5
+    * line.xy going from 0 to 8 in steps of 0.5
+    * line.z flag that controls interlacing
     *
     * tmp.z = fraction(line.y)
     * tmp.z = tmp.z >= 0.5 ? 1 : 0
     * tmp.xy = line >= 4 ? 1 : 0
+    * tmp.w = line.z ? tmp.z : tmp.y
     */
    ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(line, TGSI_SWIZZLE_Y));
    ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
    ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line, ureg_imm2f(shader, BLOCK_WIDTH / 2, BLOCK_HEIGHT / 2));
+   ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W),
+            ureg_negate(ureg_scalar(line, TGSI_SWIZZLE_Z)),
+            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z),
+            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
 
    return tmp;
 }
@@ -256,9 +259,9 @@ calc_field(struct ureg_program *shader)
 static struct ureg_dst
 fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src tc[3], sampler[3], eb[2][2], interlaced;
+   struct ureg_src tc[3], sampler[3], eb[2];
    struct ureg_dst texel, t_tc, t_eb_info, tmp;
-   unsigned i, label, l_x, l_y;
+   unsigned i, label;
 
    texel = ureg_DECL_temporary(shader);
    t_tc = ureg_DECL_temporary(shader);
@@ -269,12 +272,8 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
    tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR);
 
-   eb[0][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_0, TGSI_INTERPOLATE_CONSTANT);
-   eb[0][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0_1, TGSI_INTERPOLATE_CONSTANT);
-   eb[1][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_0, TGSI_INTERPOLATE_CONSTANT);
-   eb[1][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1_1, TGSI_INTERPOLATE_CONSTANT);
-
-   interlaced = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INTERLACED, TGSI_INTERPOLATE_CONSTANT);
+   eb[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0, TGSI_INTERPOLATE_CONSTANT);
+   eb[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1, TGSI_INTERPOLATE_CONSTANT);
 
    for (i = 0; i < 3; ++i)  {
       sampler[i] = ureg_DECL_sampler(shader, i);
@@ -286,31 +285,16 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
     * texel.cr = tex(tc[2], sampler[2])
     */
 
-   ureg_IF(shader, interlaced, &label);
-      ureg_MOV(shader, ureg_writemask(field, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z));
-   ureg_ENDIF(shader);
-
    ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_W)),
             tc[1], tc[0]);
 
-   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y), &l_y);
+   ureg_CMP(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_W)),
+            eb[1], eb[0]);
 
-      ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X), &l_x);
-         ureg_MOV(shader, t_eb_info, eb[1][1]);
-      ureg_ELSE(shader, &l_x);
-         ureg_MOV(shader, t_eb_info, eb[1][0]);
-      ureg_ENDIF(shader);
-
-   ureg_ELSE(shader, &l_y);
-
-      ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X), &l_x);
-         ureg_MOV(shader, t_eb_info, eb[0][1]);
-      ureg_ELSE(shader, &l_x);
-         ureg_MOV(shader, t_eb_info, eb[0][0]);
-      ureg_ENDIF(shader);
-
-   ureg_ENDIF(shader);
+   /* r600g is ignoring TGSI_INTERPOLATE_CONSTANT, just workaround this */
+   ureg_SGE(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ), ureg_src(t_eb_info), ureg_imm1f(shader, 0.5f));
 
    for (i = 0; i < 3; ++i) {
       ureg_IF(shader, ureg_scalar(ureg_src(t_eb_info), TGSI_SWIZZLE_X + i), &label);
-- 
cgit v1.2.3


From b680476b522b538f4de0b81785fa1f113fa0c507 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 8 Dec 2010 16:11:29 +0100
Subject: [g3dvl] allways use all motion vectors

reprogramming the vertex buffers takes more time than pumping
all motion vectors through the vertex shader
---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 62 ++++++++++--------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  3 +-
 2 files changed, 28 insertions(+), 37 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index a701e976d50..260d49a3a3b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -97,14 +97,14 @@ static const unsigned const_mbtype_config[VL_NUM_MACROBLOCK_TYPES][2] = {
 };
 
 static void *
-create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
+create_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
    struct ureg_src scale;
    struct ureg_src vrect, vpos, eb[2][2], interlaced, vmv[4];
    struct ureg_dst t_vpos, t_vtex;
    struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4];
-   unsigned i, j, count, label;
+   unsigned i, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
@@ -129,15 +129,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0);
    o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1);
    
-   count=0;
-   for (i = 0; i < ref_frames; ++i) {
-      for (j = 0; j < 2; ++j) {        
-        if(j < mv_per_frame) {
-           vmv[count] = ureg_DECL_vs_input(shader, VS_I_MV0 + count);
-           o_vmv[count] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + count);
-           count++;
-        }
-      }
+   for (i = 0; i < 4; ++i) {
+     vmv[i] = ureg_DECL_vs_input(shader, VS_I_MV0 + i);
+     o_vmv[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i);
    }
 
    /*
@@ -166,10 +160,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
     *
     * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0]
     *
-    * if(count > 0) { // Apply motion vectors
-    *    scale = 0.5 / (dst.width, dst.height);
-    *    o_vmv[0..count] = t_vpos + vmv[0..count] * scale
-    * }
+    * // Apply motion vectors
+    * scale = 0.5 / (dst.width, dst.height);
+    * o_vmv[0..count] = t_vpos + vmv[0..count] * scale
     *
     */
    scale = ureg_imm2f(shader,
@@ -210,14 +203,12 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
             ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
             eb[1][1], eb[1][0]);
 
-   if(count > 0) {
-      scale = ureg_imm2f(shader,
-         0.5f / r->buffer_width,
-         0.5f / r->buffer_height);
+   scale = ureg_imm2f(shader,
+      0.5f / r->buffer_width,
+      0.5f / r->buffer_height);
 
-      for (i = 0; i < count; ++i)
-         ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), scale, vmv[i], ureg_src(t_vpos));
-   }
+   for (i = 0; i < 4; ++i)
+      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), scale, vmv[i], ureg_src(t_vpos));
 
    ureg_release_temporary(shader, t_vtex);
    ureg_release_temporary(shader, t_vpos);
@@ -426,8 +417,7 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
 }
 
 static bool
-init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type,
-                    struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS])
+init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type)
 {
    unsigned ref_frames, mv_per_frame;
    struct vl_mc_mbtype_handler *handler;
@@ -440,18 +430,12 @@ init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE typ
 
    handler = &r->mbtype_handlers[type];
 
-   handler->vs = create_vert_shader(r, ref_frames, mv_per_frame);
+   handler->vs = create_vert_shader(r);
    handler->fs = create_frag_shader(r, ref_frames, mv_per_frame);
 
    if (handler->vs == NULL || handler->fs == NULL)
       return false;
 
-   handler->vertex_elems_state = r->pipe->create_vertex_elements_state(
-      r->pipe, 7 + ref_frames * mv_per_frame, vertex_elems);
-
-   if (handler->vertex_elems_state == NULL)
-      return false;
-
    if (!vl_vb_init(&handler->pos, r->macroblocks_per_batch, sizeof(struct vertex_stream_0) / sizeof(float)))
       return false;
 
@@ -479,7 +463,6 @@ cleanup_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE
 
    r->pipe->delete_vs_state(r->pipe, handler->vs);
    r->pipe->delete_fs_state(r->pipe, handler->fs);
-   r->pipe->delete_vertex_elements_state(r->pipe, handler->vertex_elems_state);
 
    vl_vb_cleanup(&handler->pos);
 
@@ -688,8 +671,14 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
          r->pipe, r->macroblocks_per_batch, stride);
    }
 
+   r->vertex_elems_state = r->pipe->create_vertex_elements_state(
+      r->pipe, 11, vertex_elems);
+
+   if (r->vertex_elems_state == NULL)
+      return false;
+
    for(i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i)
-      init_mbtype_handler(r, i, vertex_elems);
+      init_mbtype_handler(r, i);
 
    return true;
 }
@@ -720,6 +709,8 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
 
    vl_idct_cleanup(&r->idct_luma);
    vl_idct_cleanup(&r->idct_chroma);
+
+   r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state);
 }
 
 static enum VL_MACROBLOCK_TYPE
@@ -844,9 +835,6 @@ flush_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE ty
 
    handler = &r->mbtype_handlers[type];
 
-   r->pipe->set_vertex_buffers(r->pipe, 2 + ref_frames * mv_per_frame, r->vertex_bufs.all);
-   r->pipe->bind_vertex_elements_state(r->pipe, handler->vertex_elems_state);
-
    if(ref_frames == 2) {
 
       r->textures.individual.ref[0] = r->past->texture;
@@ -1196,6 +1184,8 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
    renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
+   renderer->pipe->set_vertex_buffers(renderer->pipe, 6, renderer->vertex_bufs.all);
+   renderer->pipe->bind_vertex_elements_state(renderer->pipe, renderer->vertex_elems_state);
 
    for (i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) {
       if (num_verts[i] > 0)
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index f1c7a2a4012..3e37c11009f 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -62,7 +62,6 @@ enum VL_MACROBLOCK_TYPE
 struct vl_mc_mbtype_handler
 {
    void *vs, *fs;
-   void *vertex_elems_state;
 
    struct vl_vertex_buffer pos;
    struct vl_vertex_buffer mv[4];
@@ -83,6 +82,8 @@ struct vl_mpeg12_mc_renderer
    struct vl_idct idct_luma, idct_chroma;
    struct vl_idct_buffer idct_y, idct_cb, idct_cr;
 
+   void *vertex_elems_state;
+
    void *rs_state;
 
    union
-- 
cgit v1.2.3


From 081b01fd604f4ab7a33804b7deabe3304631fa59 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 8 Dec 2010 18:26:58 +0100
Subject: [g3dvl] start moving macroblock type handling into shaders

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 92 +++++++++++++++++-------
 1 file changed, 67 insertions(+), 25 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 260d49a3a3b..77cb3e06287 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -52,6 +52,9 @@ struct vertex_stream_0
       float cb;
    } eb[2][2];
    float interlaced;
+   float frame_pred;
+   float ref_frames;
+   float bkwd_pred;
 };
 
 enum VS_INPUT
@@ -63,6 +66,9 @@ enum VS_INPUT
    VS_I_EB_1_0,
    VS_I_EB_1_1,
    VS_I_INTERLACED,
+   VS_I_FRAME_PRED,
+   VS_I_REF_FRAMES,
+   VS_I_BKWD_PRED,
    VS_I_MV0,
    VS_I_MV1,
    VS_I_MV2,
@@ -80,6 +86,9 @@ enum VS_OUTPUT
    VS_O_TEX2,
    VS_O_EB_0,
    VS_O_EB_1,
+   VS_O_FRAME_PRED,
+   VS_O_REF_FRAMES,
+   VS_O_BKWD_PRED,
    VS_O_MV0,
    VS_O_MV1,
    VS_O_MV2,
@@ -101,9 +110,11 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
    struct ureg_src scale;
-   struct ureg_src vrect, vpos, eb[2][2], interlaced, vmv[4];
-   struct ureg_dst t_vpos, t_vtex;
+   struct ureg_src vrect, vpos, eb[2][2], vmv[4];
+   struct ureg_src interlaced, frame_pred, ref_frames, bkwd_pred;
+   struct ureg_dst t_vpos, t_vtex, t_vmv;
    struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4];
+   struct ureg_dst o_frame_pred, o_ref_frames, o_bkwd_pred;
    unsigned i, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -112,6 +123,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
 
    t_vpos = ureg_DECL_temporary(shader);
    t_vtex = ureg_DECL_temporary(shader);
+   t_vmv = ureg_DECL_temporary(shader);
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
@@ -120,6 +132,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    eb[0][1] = ureg_DECL_vs_input(shader, VS_I_EB_0_1);
    eb[1][1] = ureg_DECL_vs_input(shader, VS_I_EB_1_1);
    interlaced = ureg_DECL_vs_input(shader, VS_I_INTERLACED);
+   frame_pred = ureg_DECL_vs_input(shader, VS_I_FRAME_PRED);
+   ref_frames = ureg_DECL_vs_input(shader, VS_I_REF_FRAMES);
+   bkwd_pred = ureg_DECL_vs_input(shader, VS_I_BKWD_PRED);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
    o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);   
@@ -128,6 +143,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);   
    o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0);
    o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1);
+   o_frame_pred = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FRAME_PRED);
+   o_ref_frames = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES);
+   o_bkwd_pred = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BKWD_PRED);
    
    for (i = 0; i < 4; ++i) {
      vmv[i] = ureg_DECL_vs_input(shader, VS_I_MV0 + i);
@@ -160,6 +178,10 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
     *
     * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0]
     *
+    * o_frame_pred = frame_pred
+    * o_ref_frames = ref_frames
+    * o_bkwd_pred = bkwd_pred
+    *
     * // Apply motion vectors
     * scale = 0.5 / (dst.width, dst.height);
     * o_vmv[0..count] = t_vpos + vmv[0..count] * scale
@@ -203,15 +225,20 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
             ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
             eb[1][1], eb[1][0]);
 
+   ureg_MOV(shader, ureg_writemask(o_frame_pred, TGSI_WRITEMASK_X), frame_pred);
+   ureg_MOV(shader, ureg_writemask(o_ref_frames, TGSI_WRITEMASK_X), ref_frames);
+   ureg_MOV(shader, ureg_writemask(o_bkwd_pred, TGSI_WRITEMASK_X), bkwd_pred);
+
    scale = ureg_imm2f(shader,
       0.5f / r->buffer_width,
       0.5f / r->buffer_height);
 
-   for (i = 0; i < 4; ++i)
+   for (i = 0; i < 4; i++)
       ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), scale, vmv[i], ureg_src(t_vpos));
 
    ureg_release_temporary(shader, t_vtex);
    ureg_release_temporary(shader, t_vpos);
+   ureg_release_temporary(shader, t_vmv);
 
    ureg_END(shader);
 
@@ -312,29 +339,36 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
 }
 
 static struct ureg_dst
-fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frames, unsigned mv_per_frame)
+fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frames)
 {
-   struct ureg_src tc[ref_frames * mv_per_frame], sampler[ref_frames];
+   struct ureg_src frame_pred; //, ref_frames, bkwd_pred;
+   struct ureg_src tc[4], sampler[ref_frames];
    struct ureg_dst ref[ref_frames], t_tc, result;
-   unsigned i;
+   unsigned i, label;
 
-   for (i = 0; i < ref_frames * mv_per_frame; ++i)
+   frame_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FRAME_PRED, TGSI_INTERPOLATE_CONSTANT);
+   //ref_frames = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES, TGSI_INTERPOLATE_CONSTANT);
+   //bkwd_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BKWD_PRED, TGSI_INTERPOLATE_CONSTANT);
+
+   for (i = 0; i < 4; ++i)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
 
-   for (i = 0; i < ref_frames; ++i) {
+   for (i = 0; i < 2; ++i) {
       sampler[i] = ureg_DECL_sampler(shader, i + 3);
       ref[i] = ureg_DECL_temporary(shader);
    }
 
    result = ureg_DECL_temporary(shader);
 
-   if (ref_frames == 1) {
-      if(mv_per_frame == 1)
+   if (ref_frames == 0)
+      ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
+   else if (ref_frames == 1) {
+      ureg_IF(shader, frame_pred, &label);
          /*
           * result = tex(tc[0], sampler[0])
           */
          ureg_TEX(shader, result, TGSI_TEXTURE_2D, tc[0], sampler[0]);
-      else {
+      ureg_ELSE(shader, &label);
          t_tc = ureg_DECL_temporary(shader);
          /*
           * result = tex(field.y ? tc[1] : tc[0], sampler[0])
@@ -345,16 +379,16 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frame
          ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]);
 
          ureg_release_temporary(shader, t_tc);
-      }
+      ureg_ENDIF(shader);
 
    } else if (ref_frames == 2) {
-      if(mv_per_frame == 1) {
+      ureg_IF(shader, frame_pred, &label);
          /*
           * ref[0..1] = tex(tc[0..1], sampler[0..1])
           */
          ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
          ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]);
-      } else {
+      ureg_ELSE(shader, &label);
          t_tc = ureg_DECL_temporary(shader);
 
          /*
@@ -374,7 +408,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frame
          ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[1]);
 
          ureg_release_temporary(shader, t_tc);
-      }
+      ureg_ENDIF(shader);
 
       ureg_LRP(shader, result, ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
    }
@@ -386,10 +420,10 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frame
 }
 
 static void *
-create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigned mv_per_frame)
+create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames)
 {
    struct ureg_program *shader;
-   struct ureg_src result;
+   struct ureg_dst result;
    struct ureg_dst field, texel;
    struct ureg_dst fragment;
 
@@ -402,15 +436,13 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames, unsigne
    field = calc_field(shader);
    texel = fetch_ycbcr(r, shader, field);
 
-   if (ref_frames == 0)
-      result = ureg_imm1f(shader, 0.5f);
-   else
-      result = ureg_src(fetch_ref(shader, field, ref_frames, mv_per_frame));
+   result = fetch_ref(shader, field, ref_frames);
 
-   ureg_ADD(shader, fragment, ureg_src(texel), result);
+   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(result));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
+   ureg_release_temporary(shader, result);
    ureg_END(shader);
 
    return ureg_create_shader_and_destroy(shader, r->pipe);
@@ -431,7 +463,7 @@ init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE typ
    handler = &r->mbtype_handlers[type];
 
    handler->vs = create_vert_shader(r);
-   handler->fs = create_frag_shader(r, ref_frames, mv_per_frame);
+   handler->fs = create_frag_shader(r, ref_frames);
 
    if (handler->vs == NULL || handler->fs == NULL)
       return false;
@@ -658,7 +690,16 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    /* progressive=0.0f interlaced=1.0f */
    vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
 
-   stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 6, 1);
+   /* frame=0.0f field=1.0f */
+   vertex_elems[VS_I_FRAME_PRED].src_format = PIPE_FORMAT_R32_FLOAT;
+
+   /* intra=0.0f forward/backward=1.0f bi=-1.0f */
+   vertex_elems[VS_I_REF_FRAMES].src_format = PIPE_FORMAT_R32_FLOAT;
+
+   /* forward=0.0f backward=1.0f */
+   vertex_elems[VS_I_BKWD_PRED].src_format = PIPE_FORMAT_R32_FLOAT;
+
+   stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 9, 1);
 
    r->vertex_bufs.individual.pos = vl_vb_create_buffer(
       r->pipe, r->macroblocks_per_batch, stride);
@@ -672,7 +713,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    }
 
    r->vertex_elems_state = r->pipe->create_vertex_elements_state(
-      r->pipe, 11, vertex_elems);
+      r->pipe, NUM_VS_INPUTS, vertex_elems);
 
    if (r->vertex_elems_state == NULL)
       return false;
@@ -980,6 +1021,7 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
       }
    }
    info.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
+   info.frame_pred = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? 1.0f : 0.0f;
    vl_vb_add_block(&handler->pos, (float*)&info);
 
    get_motion_vectors(mb, mv);
-- 
cgit v1.2.3


From 5790ca5289596332a3f4d736f554dc8118ee3883 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 8 Dec 2010 18:42:48 +0100
Subject: [g3dvl] moving macroblock type handling into shaders part 2

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 59 ++++++++++++------------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 77cb3e06287..d6104972ba7 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -341,14 +341,14 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
 static struct ureg_dst
 fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frames)
 {
-   struct ureg_src frame_pred; //, ref_frames, bkwd_pred;
+   struct ureg_src frame_pred, bkwd_pred; //, ref_frames, ;
    struct ureg_src tc[4], sampler[ref_frames];
    struct ureg_dst ref[ref_frames], t_tc, result;
    unsigned i, label;
 
    frame_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FRAME_PRED, TGSI_INTERPOLATE_CONSTANT);
    //ref_frames = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES, TGSI_INTERPOLATE_CONSTANT);
-   //bkwd_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BKWD_PRED, TGSI_INTERPOLATE_CONSTANT);
+   bkwd_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BKWD_PRED, TGSI_INTERPOLATE_CONSTANT);
 
    for (i = 0; i < 4; ++i)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
@@ -363,24 +363,33 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frame
    if (ref_frames == 0)
       ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
    else if (ref_frames == 1) {
+      t_tc = ureg_DECL_temporary(shader);
       ureg_IF(shader, frame_pred, &label);
+
          /*
           * result = tex(tc[0], sampler[0])
           */
-         ureg_TEX(shader, result, TGSI_TEXTURE_2D, tc[0], sampler[0]);
+         ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[0]);
+
       ureg_ELSE(shader, &label);
-         t_tc = ureg_DECL_temporary(shader);
+
          /*
           * result = tex(field.y ? tc[1] : tc[0], sampler[0])
           */
          ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
             ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
             tc[1], tc[0]);
-         ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]);
 
-         ureg_release_temporary(shader, t_tc);
       ureg_ENDIF(shader);
 
+      ureg_IF(shader, bkwd_pred, &label);
+         ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[1]);
+      ureg_ELSE(shader, &label);
+         ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]);
+      ureg_ENDIF(shader);
+
+      ureg_release_temporary(shader, t_tc);
+
    } else if (ref_frames == 2) {
       ureg_IF(shader, frame_pred, &label);
          /*
@@ -876,29 +885,6 @@ flush_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE ty
 
    handler = &r->mbtype_handlers[type];
 
-   if(ref_frames == 2) {
-
-      r->textures.individual.ref[0] = r->past->texture;
-      r->textures.individual.ref[1] = r->future->texture;
-      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, r->past);
-      r->sampler_views.individual.ref[1] = find_or_create_sampler_view(r, r->future);
-
-   } else if(ref_frames == 1) {
-
-      struct pipe_surface *ref;
-
-      if(type == VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED ||
-         type == VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED)
-         ref = r->future;
-      else
-         ref = r->past;
-
-      r->textures.individual.ref[0] = ref->texture;
-      r->sampler_views.individual.ref[0] = find_or_create_sampler_view(r, ref);
-   }
-
-   r->pipe->set_fragment_sampler_views(r->pipe, 3 + ref_frames, r->sampler_views.all);
-   r->pipe->bind_fragment_sampler_states(r->pipe, 3 + ref_frames, r->samplers.all);
    r->pipe->bind_vs_state(r->pipe, handler->vs);
    r->pipe->bind_fs_state(r->pipe, handler->fs);
 
@@ -1022,6 +1008,8 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
    }
    info.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
    info.frame_pred = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? 1.0f : 0.0f;
+   info.bkwd_pred = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD ? 1.0f : 0.0f;
+
    vl_vb_add_block(&handler->pos, (float*)&info);
 
    get_motion_vectors(mb, mv);
@@ -1229,6 +1217,19 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
    renderer->pipe->set_vertex_buffers(renderer->pipe, 6, renderer->vertex_bufs.all);
    renderer->pipe->bind_vertex_elements_state(renderer->pipe, renderer->vertex_elems_state);
 
+   if (renderer->past) {
+      renderer->textures.individual.ref[0] = renderer->past->texture;
+      renderer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, renderer->past);
+   }
+
+   if (renderer->future) {
+      renderer->textures.individual.ref[1] = renderer->future->texture;
+      renderer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, renderer->future);
+   }
+
+   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 5, renderer->sampler_views.all);
+   renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 5, renderer->samplers.all);
+
    for (i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) {
       if (num_verts[i] > 0)
          vb_start += flush_mbtype_handler(renderer, i, vb_start, num_verts[i]);
-- 
cgit v1.2.3


From 050e7035f246f3271f53b962d743ff1a864c6f33 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 8 Dec 2010 19:18:55 +0100
Subject: [g3dvl] move macro block type handling into shaders part 3

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 54 +++++++++++++++++-------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index d6104972ba7..bd612f3b1bd 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -339,15 +339,15 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
 }
 
 static struct ureg_dst
-fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frames)
+fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src frame_pred, bkwd_pred; //, ref_frames, ;
-   struct ureg_src tc[4], sampler[ref_frames];
-   struct ureg_dst ref[ref_frames], t_tc, result;
-   unsigned i, label;
+   struct ureg_src ref_frames, frame_pred, bkwd_pred;
+   struct ureg_src tc[4], sampler[2];
+   struct ureg_dst ref[2], t_tc, result;
+   unsigned i, intra_label, bi_label, label;
 
+   ref_frames = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES, TGSI_INTERPOLATE_CONSTANT);
    frame_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FRAME_PRED, TGSI_INTERPOLATE_CONSTANT);
-   //ref_frames = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES, TGSI_INTERPOLATE_CONSTANT);
    bkwd_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BKWD_PRED, TGSI_INTERPOLATE_CONSTANT);
 
    for (i = 0; i < 4; ++i)
@@ -360,9 +360,13 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frame
 
    result = ureg_DECL_temporary(shader);
 
-   if (ref_frames == 0)
+   ureg_SEQ(shader, ureg_writemask(result, TGSI_WRITEMASK_X), ref_frames, ureg_imm1f(shader, -1.0f));
+   ureg_IF(shader, ureg_scalar(ureg_src(result), TGSI_SWIZZLE_X), &intra_label);
       ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
-   else if (ref_frames == 1) {
+
+   ureg_ELSE(shader, &intra_label);
+   ureg_IF(shader, ureg_scalar(ref_frames, TGSI_SWIZZLE_X), &bi_label);
+
       t_tc = ureg_DECL_temporary(shader);
       ureg_IF(shader, frame_pred, &label);
 
@@ -390,7 +394,8 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frame
 
       ureg_release_temporary(shader, t_tc);
 
-   } else if (ref_frames == 2) {
+   ureg_ELSE(shader, &bi_label);
+
       ureg_IF(shader, frame_pred, &label);
          /*
           * ref[0..1] = tex(tc[0..1], sampler[0..1])
@@ -420,16 +425,18 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field, unsigned ref_frame
       ureg_ENDIF(shader);
 
       ureg_LRP(shader, result, ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
-   }
 
-   for (i = 0; i < ref_frames; ++i)
+   ureg_ENDIF(shader);
+   ureg_ENDIF(shader);
+
+   for (i = 0; i < 2; ++i)
       ureg_release_temporary(shader, ref[i]);
 
    return result;
 }
 
 static void *
-create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames)
+create_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
    struct ureg_dst result;
@@ -445,7 +452,7 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned ref_frames)
    field = calc_field(shader);
    texel = fetch_ycbcr(r, shader, field);
 
-   result = fetch_ref(shader, field, ref_frames);
+   result = fetch_ref(shader, field);
 
    ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(result));
 
@@ -472,7 +479,7 @@ init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE typ
    handler = &r->mbtype_handlers[type];
 
    handler->vs = create_vert_shader(r);
-   handler->fs = create_frag_shader(r, ref_frames);
+   handler->fs = create_frag_shader(r);
 
    if (handler->vs == NULL || handler->fs == NULL)
       return false;
@@ -702,7 +709,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    /* frame=0.0f field=1.0f */
    vertex_elems[VS_I_FRAME_PRED].src_format = PIPE_FORMAT_R32_FLOAT;
 
-   /* intra=0.0f forward/backward=1.0f bi=-1.0f */
+   /* intra=-1.0f forward/backward=1.0f bi=0.0f */
    vertex_elems[VS_I_REF_FRAMES].src_format = PIPE_FORMAT_R32_FLOAT;
 
    /* forward=0.0f backward=1.0f */
@@ -1009,6 +1016,23 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
    info.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
    info.frame_pred = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? 1.0f : 0.0f;
    info.bkwd_pred = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD ? 1.0f : 0.0f;
+   switch (mb->mb_type) {
+      case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
+         info.ref_frames = -1.0f;
+         break;
+
+      case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
+      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
+         info.ref_frames = 1.0f;
+         break;
+        
+      case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
+         info.ref_frames = 0.0f;
+         break;
+
+      default:
+         assert(0);
+   }
 
    vl_vb_add_block(&handler->pos, (float*)&info);
 
-- 
cgit v1.2.3


From 8df88ca10fbfc56fca715fe60ab2edecd15d5736 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 8 Dec 2010 19:41:01 +0100
Subject: [g3dvl] remove mb type handler structure

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 184 ++++-------------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  17 +--
 2 files changed, 38 insertions(+), 163 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index bd612f3b1bd..ed2de7930e7 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -464,61 +464,6 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r)
    return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
-static bool
-init_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type)
-{
-   unsigned ref_frames, mv_per_frame;
-   struct vl_mc_mbtype_handler *handler;
-   unsigned i;
-
-   assert(r);
-
-   ref_frames = const_mbtype_config[type][0];
-   mv_per_frame = const_mbtype_config[type][1];
-
-   handler = &r->mbtype_handlers[type];
-
-   handler->vs = create_vert_shader(r);
-   handler->fs = create_frag_shader(r);
-
-   if (handler->vs == NULL || handler->fs == NULL)
-      return false;
-
-   if (!vl_vb_init(&handler->pos, r->macroblocks_per_batch, sizeof(struct vertex_stream_0) / sizeof(float)))
-      return false;
-
-   for (i = 0; i < ref_frames * mv_per_frame; ++i) {
-      if (!vl_vb_init(&handler->mv[i], r->macroblocks_per_batch, sizeof(struct vertex2f) / sizeof(float)))
-         return false;
-   }
-
-   return true;
-}
-
-static void
-cleanup_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type)
-{
-   unsigned ref_frames, mv_per_frame;
-   struct vl_mc_mbtype_handler *handler;
-   unsigned i;
-
-   assert(r);
-
-   ref_frames = const_mbtype_config[type][0];
-   mv_per_frame = const_mbtype_config[type][1];
-
-   handler = &r->mbtype_handlers[type];
-
-   r->pipe->delete_vs_state(r->pipe, handler->vs);
-   r->pipe->delete_fs_state(r->pipe, handler->fs);
-
-   vl_vb_cleanup(&handler->pos);
-
-   for (i = 0; i < ref_frames * mv_per_frame; ++i)
-      vl_vb_cleanup(&handler->mv[i]);
-}
-
-
 static bool
 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
 {
@@ -734,8 +679,19 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    if (r->vertex_elems_state == NULL)
       return false;
 
-   for(i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i)
-      init_mbtype_handler(r, i);
+   r->vs = create_vert_shader(r);
+   r->fs = create_frag_shader(r);
+
+   if (r->vs == NULL || r->fs == NULL)
+      return false;
+
+   if (!vl_vb_init(&r->pos, r->macroblocks_per_batch, sizeof(struct vertex_stream_0) / sizeof(float)))
+      return false;
+
+   for (i = 0; i < 4; ++i) {
+      if (!vl_vb_init(&r->mv[i], r->macroblocks_per_batch, sizeof(struct vertex2f) / sizeof(float)))
+         return false;
+   }
 
    return true;
 }
@@ -753,8 +709,13 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
       pipe_resource_reference(&r->textures.all[i], NULL);
    }
 
-   for(i = 0; i<VL_NUM_MACROBLOCK_TYPES; ++i)
-      cleanup_mbtype_handler(r, i);
+   r->pipe->delete_vs_state(r->pipe, r->vs);
+   r->pipe->delete_fs_state(r->pipe, r->fs);
+
+   vl_vb_cleanup(&r->pos);
+
+   for (i = 0; i < 4; ++i)
+      vl_vb_cleanup(&r->mv[i]);
 
    vl_idct_unmap_buffers(&r->idct_luma, &r->idct_y);
    vl_idct_unmap_buffers(&r->idct_chroma, &r->idct_cb);
@@ -770,34 +731,8 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
    r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state);
 }
 
-static enum VL_MACROBLOCK_TYPE
-get_macroblock_type(struct pipe_mpeg12_macroblock *mb)
-{
-   assert(mb);
-
-   switch (mb->mb_type) {
-      case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
-         return VL_MACROBLOCK_TYPE_INTRA;
-      case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
-         return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
-            VL_MACROBLOCK_TYPE_FWD_FRAME_PRED : VL_MACROBLOCK_TYPE_FWD_FIELD_PRED;
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
-         return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
-            VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED : VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED;
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
-         return mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ?
-            VL_MACROBLOCK_TYPE_BI_FRAME_PRED : VL_MACROBLOCK_TYPE_BI_FIELD_PRED;
-      default:
-         assert(0);
-   }
-
-   /* Unreachable */
-   return -1;
-}
-
 static void
-upload_vertex_stream(struct vl_mpeg12_mc_renderer *r,
-                      unsigned num_macroblocks[VL_NUM_MACROBLOCK_TYPES])
+upload_vertex_stream(struct vl_mpeg12_mc_renderer *r)
 {
    struct vertex_stream_0 *pos;
    struct vertex2f *mv[4];
@@ -807,7 +742,6 @@ upload_vertex_stream(struct vl_mpeg12_mc_renderer *r,
    unsigned i, j;
 
    assert(r);
-   assert(num_macroblocks);
 
    pos = (struct vertex_stream_0 *)pipe_buffer_map
    (
@@ -826,25 +760,9 @@ upload_vertex_stream(struct vl_mpeg12_mc_renderer *r,
          &buf_transfer[i + 1]
       );
 
-   for (i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) {
-      struct vl_mc_mbtype_handler *handler = &r->mbtype_handlers[i];
-      unsigned count = vl_vb_upload(&handler->pos, pos);
-      if (count > 0) {
-         pos += count;
-
-         unsigned ref_frames, mv_per_frame;
-
-         ref_frames = const_mbtype_config[i][0];
-         mv_per_frame = const_mbtype_config[i][1];
-
-         for (j = 0; j < ref_frames * mv_per_frame; ++j)
-            vl_vb_upload(&handler->mv[j], mv[j]);
-
-         for (j = 0; j < 4; ++j)
-            mv[j] += count;
-      }
-      num_macroblocks[i] = count;
-   }
+   vl_vb_upload(&r->pos, pos);
+   for (j = 0; j < 4; ++j)
+      vl_vb_upload(&r->mv[j], mv[j]);
 
    pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.pos.buffer, buf_transfer[0]);
    for (i = 0; i < 4; ++i)
@@ -878,27 +796,6 @@ static struct pipe_sampler_view
    return sampler_view;
 }
 
-static unsigned
-flush_mbtype_handler(struct vl_mpeg12_mc_renderer *r, enum VL_MACROBLOCK_TYPE type,
-                     unsigned vb_start, unsigned num_macroblocks)
-{
-   unsigned ref_frames, mv_per_frame;
-   struct vl_mc_mbtype_handler *handler;
-
-   assert(r);
-
-   ref_frames = const_mbtype_config[type][0];
-   mv_per_frame = const_mbtype_config[type][1];
-
-   handler = &r->mbtype_handlers[type];
-
-   r->pipe->bind_vs_state(r->pipe, handler->vs);
-   r->pipe->bind_fs_state(r->pipe, handler->fs);
-
-   util_draw_arrays(r->pipe, PIPE_PRIM_QUADS, vb_start, num_macroblocks);
-   return num_macroblocks;
-}
-
 static void
 get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4])
 {
@@ -984,26 +881,14 @@ static void
 grab_vectors(struct vl_mpeg12_mc_renderer *r,
              struct pipe_mpeg12_macroblock *mb)
 {
-   enum VL_MACROBLOCK_TYPE type;
-   struct vl_mc_mbtype_handler *handler;
    struct vertex2f mv[4];
    struct vertex_stream_0 info;
 
-   unsigned ref_frames, mv_per_frame;
-   unsigned i, j, pos;
+   unsigned i, j;
 
    assert(r);
    assert(mb);
 
-   type = get_macroblock_type(mb);
-
-   ref_frames = const_mbtype_config[type][0];
-   mv_per_frame = const_mbtype_config[type][1];
-
-   handler = &r->mbtype_handlers[type];
-
-   pos = handler->pos.num_verts;
-
    info.pos.x = mb->mbx;
    info.pos.y = mb->mby;
    for ( i = 0; i < 2; ++i) {
@@ -1034,11 +919,11 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
          assert(0);
    }
 
-   vl_vb_add_block(&handler->pos, (float*)&info);
+   vl_vb_add_block(&r->pos, (float*)&info);
 
    get_motion_vectors(mb, mv);
-   for ( j = 0; j < ref_frames * mv_per_frame; ++j )
-      vl_vb_add_block(&handler->mv[j], (float*)&mv[j]);
+   for ( j = 0; j < 4; ++j )
+      vl_vb_add_block(&r->mv[j], (float*)&mv[j]);
 }
 
 static void
@@ -1215,9 +1100,6 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
 void
 vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
 {
-   unsigned num_verts[VL_NUM_MACROBLOCK_TYPES] = { 0 };
-   unsigned vb_start = 0, i;
-
    assert(renderer);
    assert(renderer->num_macroblocks <= renderer->macroblocks_per_batch);
 
@@ -1232,7 +1114,7 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
    vl_idct_flush(&renderer->idct_chroma, &renderer->idct_cr);
    vl_idct_flush(&renderer->idct_chroma, &renderer->idct_cb);
 
-   upload_vertex_stream(renderer, num_verts);
+   upload_vertex_stream(renderer);
 
    renderer->fb_state.cbufs[0] = renderer->surface;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
@@ -1250,14 +1132,12 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
       renderer->textures.individual.ref[1] = renderer->future->texture;
       renderer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, renderer->future);
    }
-
    renderer->pipe->set_fragment_sampler_views(renderer->pipe, 5, renderer->sampler_views.all);
    renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 5, renderer->samplers.all);
 
-   for (i = 0; i < VL_NUM_MACROBLOCK_TYPES; ++i) {
-      if (num_verts[i] > 0)
-         vb_start += flush_mbtype_handler(renderer, i, vb_start, num_verts[i]);
-   }
+   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
+   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs);
+   util_draw_arrays(renderer->pipe, PIPE_PRIM_QUADS, 0, renderer->num_macroblocks * 4);
 
    renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, renderer->fence);
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 3e37c11009f..e0349886fa2 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -59,14 +59,6 @@ enum VL_MACROBLOCK_TYPE
    VL_NUM_MACROBLOCK_TYPES
 };
 
-struct vl_mc_mbtype_handler
-{
-   void *vs, *fs;
-
-   struct vl_vertex_buffer pos;
-   struct vl_vertex_buffer mv[4];
-};
-
 struct vl_mpeg12_mc_renderer
 {
    struct pipe_context *pipe;
@@ -83,9 +75,14 @@ struct vl_mpeg12_mc_renderer
    struct vl_idct_buffer idct_y, idct_cb, idct_cr;
 
    void *vertex_elems_state;
-
    void *rs_state;
 
+   void *vs, *fs;
+
+   struct vl_vertex_buffer pos;
+   struct vl_vertex_buffer mv[4];
+
+
    union
    {
       void *all[5];
@@ -98,8 +95,6 @@ struct vl_mpeg12_mc_renderer
       struct { struct pipe_sampler_view *y, *cb, *cr, *ref[2]; } individual;
    } sampler_views;
 
-   struct vl_mc_mbtype_handler mbtype_handlers[VL_NUM_MACROBLOCK_TYPES];
-
    union
    {
       struct pipe_resource *all[5];
-- 
cgit v1.2.3


From 1482b9a7f39ec8875fcd4137c35b3cb9ac0c0934 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 8 Dec 2010 20:48:34 +0100
Subject: [g3dvl] remove shadow buffering of vertex buffers

---
 src/gallium/auxiliary/vl/vl_idct.c               | 24 ++-----
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 81 ++++++------------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h | 13 ----
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     | 77 +++++++++++++---------
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     | 19 ++++--
 5 files changed, 86 insertions(+), 128 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 168e6e88b2b..0a81134a789 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -464,14 +464,13 @@ init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    buffer->vertex_bufs.individual.quad.buffer_offset = idct->quad.buffer_offset;
    pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, idct->quad.buffer);
 
-   buffer->vertex_bufs.individual.pos = vl_vb_create_buffer(idct->pipe, idct->max_blocks, idct->vertex_buffer_stride);
+   buffer->vertex_bufs.individual.pos = vl_vb_init(
+      &buffer->blocks, idct->pipe, idct->max_blocks, 2,
+      idct->vertex_buffer_stride);
 
    if(buffer->vertex_bufs.individual.pos.buffer == NULL)
       return false;
 
-   if (!vl_vb_init(&buffer->blocks, idct->max_blocks, 2))
-      return false;
-
    return true;
 }
 
@@ -668,6 +667,8 @@ vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    );
 
    buffer->texels = idct->pipe->transfer_map(idct->pipe, buffer->tex_transfer);
+
+   vl_vb_map(&buffer->blocks, idct->pipe);
 }
 
 void
@@ -699,28 +700,17 @@ vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 
    idct->pipe->transfer_unmap(idct->pipe, buffer->tex_transfer);
    idct->pipe->transfer_destroy(idct->pipe, buffer->tex_transfer);
+   vl_vb_unmap(&buffer->blocks, idct->pipe);
 }
 
 void
 vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
-   struct pipe_transfer *vec_transfer;
-   void *vectors;
    unsigned num_verts;
 
    assert(idct);
 
-   vectors = pipe_buffer_map
-   (
-      idct->pipe,
-      buffer->vertex_bufs.individual.pos.buffer,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &vec_transfer
-   );
-
-   num_verts = vl_vb_upload(&buffer->blocks, vectors);
-
-   pipe_buffer_unmap(idct->pipe, buffer->vertex_bufs.individual.pos.buffer, vec_transfer);
+   num_verts = vl_vb_restart(&buffer->blocks);
 
    if(num_verts > 0) {
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index ed2de7930e7..66e4fc0a07a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -95,16 +95,6 @@ enum VS_OUTPUT
    VS_O_MV3
 };
 
-static const unsigned const_mbtype_config[VL_NUM_MACROBLOCK_TYPES][2] = {
-   [VL_MACROBLOCK_TYPE_INTRA]           = { 0, 0 },
-   [VL_MACROBLOCK_TYPE_FWD_FRAME_PRED]  = { 1, 1 },
-   [VL_MACROBLOCK_TYPE_FWD_FIELD_PRED]  = { 1, 2 },
-   [VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED] = { 1, 1 },
-   [VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED] = { 1, 2 },
-   [VL_MACROBLOCK_TYPE_BI_FRAME_PRED]   = { 2, 1 },
-   [VL_MACROBLOCK_TYPE_BI_FIELD_PRED]   = { 2, 2 }
-};
-
 static void *
 create_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
@@ -662,15 +652,19 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
 
    stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 9, 1);
 
-   r->vertex_bufs.individual.pos = vl_vb_create_buffer(
-      r->pipe, r->macroblocks_per_batch, stride);
+   r->vertex_bufs.individual.pos = vl_vb_init(
+      &r->pos, r->pipe, r->macroblocks_per_batch, 
+      sizeof(struct vertex_stream_0) / sizeof(float),
+      stride);
 
    for (i = 0; i < 4; ++i) {
       /* motion vector 0..4 element */
       vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R32G32_FLOAT;
       stride = vl_vb_element_helper(&vertex_elems[VS_I_MV0 + i], 1, i + 2);
-      r->vertex_bufs.individual.mv[i] = vl_vb_create_buffer(
-         r->pipe, r->macroblocks_per_batch, stride);
+      r->vertex_bufs.individual.mv[i] = vl_vb_init(
+         &r->mv[i], r->pipe, r->macroblocks_per_batch,
+         sizeof(struct vertex2f) / sizeof(float),
+         stride);
    }
 
    r->vertex_elems_state = r->pipe->create_vertex_elements_state(
@@ -685,14 +679,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    if (r->vs == NULL || r->fs == NULL)
       return false;
 
-   if (!vl_vb_init(&r->pos, r->macroblocks_per_batch, sizeof(struct vertex_stream_0) / sizeof(float)))
-      return false;
-
-   for (i = 0; i < 4; ++i) {
-      if (!vl_vb_init(&r->mv[i], r->macroblocks_per_batch, sizeof(struct vertex2f) / sizeof(float)))
-         return false;
-   }
-
    return true;
 }
 
@@ -731,44 +717,6 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
    r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state);
 }
 
-static void
-upload_vertex_stream(struct vl_mpeg12_mc_renderer *r)
-{
-   struct vertex_stream_0 *pos;
-   struct vertex2f *mv[4];
-
-   struct pipe_transfer *buf_transfer[5];
-
-   unsigned i, j;
-
-   assert(r);
-
-   pos = (struct vertex_stream_0 *)pipe_buffer_map
-   (
-      r->pipe,
-      r->vertex_bufs.individual.pos.buffer,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buf_transfer[0]
-   );
-
-   for (i = 0; i < 4; ++i)
-      mv[i] = (struct vertex2f *)pipe_buffer_map
-      (
-         r->pipe,
-         r->vertex_bufs.individual.mv[i].buffer,
-         PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-         &buf_transfer[i + 1]
-      );
-
-   vl_vb_upload(&r->pos, pos);
-   for (j = 0; j < 4; ++j)
-      vl_vb_upload(&r->mv[j], mv[j]);
-
-   pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.pos.buffer, buf_transfer[0]);
-   for (i = 0; i < 4; ++i)
-      pipe_buffer_unmap(r->pipe, r->vertex_bufs.individual.mv[i].buffer, buf_transfer[i + 1]);
-}
-
 static struct pipe_sampler_view
 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
 {
@@ -1100,6 +1048,8 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
 void
 vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
 {
+   unsigned i;
+
    assert(renderer);
    assert(renderer->num_macroblocks <= renderer->macroblocks_per_batch);
 
@@ -1114,7 +1064,12 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
    vl_idct_flush(&renderer->idct_chroma, &renderer->idct_cr);
    vl_idct_flush(&renderer->idct_chroma, &renderer->idct_cb);
 
-   upload_vertex_stream(renderer);
+   vl_vb_unmap(&renderer->pos, renderer->pipe);
+   vl_vb_restart(&renderer->pos);
+   for(i = 0; i < 4; ++i) {
+      vl_vb_unmap(&renderer->mv[i], renderer->pipe);
+      vl_vb_restart(&renderer->mv[i]);
+   }
 
    renderer->fb_state.cbufs[0] = renderer->surface;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
@@ -1145,5 +1100,9 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
    vl_idct_map_buffers(&renderer->idct_chroma, &renderer->idct_cr);
    vl_idct_map_buffers(&renderer->idct_chroma, &renderer->idct_cb);
 
+   vl_vb_map(&renderer->pos, renderer->pipe);
+   for(i = 0; i < 4; ++i)
+      vl_vb_map(&renderer->mv[i], renderer->pipe);
+
    renderer->num_macroblocks = 0;
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index e0349886fa2..e12ac50ae0d 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -46,19 +46,6 @@ enum VL_MPEG12_MC_RENDERER_BUFFER_MODE
    VL_MPEG12_MC_RENDERER_BUFFER_PICTURE /* Larger batches, more memory */
 };
 
-enum VL_MACROBLOCK_TYPE
-{
-   VL_MACROBLOCK_TYPE_INTRA,
-   VL_MACROBLOCK_TYPE_FWD_FRAME_PRED,
-   VL_MACROBLOCK_TYPE_FWD_FIELD_PRED,
-   VL_MACROBLOCK_TYPE_BKWD_FRAME_PRED,
-   VL_MACROBLOCK_TYPE_BKWD_FIELD_PRED,
-   VL_MACROBLOCK_TYPE_BI_FRAME_PRED,
-   VL_MACROBLOCK_TYPE_BI_FIELD_PRED,
-
-   VL_NUM_MACROBLOCK_TYPES
-};
-
 struct vl_mpeg12_mc_renderer
 {
    struct pipe_context *pipe;
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 4647685053b..4182bad784b 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -96,24 +96,6 @@ vl_vb_get_quad_vertex_element()
    return element;
 }
 
-struct pipe_vertex_buffer
-vl_vb_create_buffer(struct pipe_context *pipe, unsigned max_blocks, unsigned stride)
-{
-   struct pipe_vertex_buffer buf;
-
-   buf.stride = stride;
-   buf.max_index = 4 * max_blocks - 1;
-   buf.buffer_offset = 0;
-   buf.buffer = pipe_buffer_create
-   (
-      pipe->screen,
-      PIPE_BIND_VERTEX_BUFFER,
-      stride * 4 * max_blocks
-   );
-
-   return buf;
-}
-
 unsigned
 vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements,
                               unsigned vertex_buffer_index)
@@ -132,30 +114,63 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements
    return offset;
 }
 
-bool
-vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks, unsigned num_elements)
+struct pipe_vertex_buffer
+vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
+           unsigned max_blocks, unsigned num_elements, unsigned stride)
 {
+   struct pipe_vertex_buffer buf;
+
    assert(buffer);
 
    buffer->num_verts = 0;
    buffer->num_elements = num_elements;
-   buffer->buffer = MALLOC(max_blocks * num_elements * sizeof(float) * 4);
-   return buffer->buffer != NULL;
+
+   buf.stride = stride;
+   buf.max_index = 4 * max_blocks - 1;
+   buf.buffer_offset = 0;
+   buf.buffer = pipe_buffer_create
+   (
+      pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      stride * 4 * max_blocks
+   );
+
+   pipe_resource_reference(&buffer->resource, buf.buffer);
+
+   vl_vb_map(buffer, pipe);
+
+   return buf;
 }
 
-unsigned
-vl_vb_upload(struct vl_vertex_buffer *buffer, void *dst)
+void
+vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 {
-   unsigned todo;
+   assert(buffer && pipe);
 
-   assert(buffer);
+   buffer->vectors = pipe_buffer_map
+   (
+      pipe,
+      buffer->resource,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buffer->transfer
+   );
+}
 
-   todo = buffer->num_verts;
-   buffer->num_verts = 0;
+void
+vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
+{
+   assert(buffer && pipe);
 
-   if(todo)
-      memcpy(dst, buffer->buffer, sizeof(float) * buffer->num_elements * todo);
+   pipe_buffer_unmap(pipe, buffer->resource, buffer->transfer);
+}
+
+unsigned
+vl_vb_restart(struct vl_vertex_buffer *buffer)
+{
+   assert(buffer);
 
+   unsigned todo = buffer->num_verts;
+   buffer->num_verts = 0;
    return todo;
 }
 
@@ -164,5 +179,5 @@ vl_vb_cleanup(struct vl_vertex_buffer *buffer)
 {
    assert(buffer);
 
-   FREE(buffer->buffer);
+   pipe_resource_reference(&buffer->resource, NULL);
 }
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index dae80fa767b..e312515dc30 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -35,19 +35,24 @@ struct vl_vertex_buffer
 {
    unsigned num_verts;
    unsigned num_elements;
-   float *buffer;
+   struct pipe_resource *resource;
+   struct pipe_transfer *transfer;
+   float *vectors;
 };
 
 struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks);
 
 struct pipe_vertex_element vl_vb_get_quad_vertex_element();
 
-struct pipe_vertex_buffer vl_vb_create_buffer(struct pipe_context *pipe, unsigned max_blocks, unsigned stride);
-
 unsigned vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements,
                               unsigned vertex_buffer_index);
 
-bool vl_vb_init(struct vl_vertex_buffer *buffer, unsigned max_blocks, unsigned num_elements);
+struct pipe_vertex_buffer vl_vb_init(struct vl_vertex_buffer *buffer,
+                                     struct pipe_context *pipe,
+                                     unsigned max_blocks, unsigned num_elements,
+                                     unsigned stride);
+
+void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
 static inline void
 vl_vb_add_block(struct vl_vertex_buffer *buffer, float *elements)
@@ -58,13 +63,15 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, float *elements)
    assert(buffer);
 
    for(i = 0; i < 4; ++i) {
-      pos = buffer->buffer + buffer->num_verts * buffer->num_elements;
+      pos = buffer->vectors + buffer->num_verts * buffer->num_elements;
       memcpy(pos, elements, sizeof(float) * buffer->num_elements);
       buffer->num_verts++;
    }
 }
 
-unsigned vl_vb_upload(struct vl_vertex_buffer *buffer, void *dst);
+void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
+
+unsigned vl_vb_restart(struct vl_vertex_buffer *buffer);
 
 void vl_vb_cleanup(struct vl_vertex_buffer *buffer);
 
-- 
cgit v1.2.3


From 877edb67859cffad44b1cea1c3392911e86cdef6 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 8 Dec 2010 21:28:38 +0100
Subject: [g3dvl] replace IF THEN ELSE with CMP statement

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 83 ++++++++++--------------
 1 file changed, 36 insertions(+), 47 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 66e4fc0a07a..445e2aae234 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -239,10 +239,12 @@ static struct ureg_dst
 calc_field(struct ureg_program *shader)
 {
    struct ureg_dst tmp;
-   struct ureg_src line;
+   struct ureg_src line, frame_pred;
 
    tmp = ureg_DECL_temporary(shader);
+
    line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
+   frame_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FRAME_PRED, TGSI_INTERPOLATE_CONSTANT);
 
    /*
     * line.xy going from 0 to 8 in steps of 0.5
@@ -252,15 +254,22 @@ calc_field(struct ureg_program *shader)
     * tmp.z = tmp.z >= 0.5 ? 1 : 0
     * tmp.xy = line >= 4 ? 1 : 0
     * tmp.w = line.z ? tmp.z : tmp.y
+    * tmp.z = frame_pred ? 0.0f : tmp.z
     */
    ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(line, TGSI_SWIZZLE_Y));
    ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
    ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line, ureg_imm2f(shader, BLOCK_WIDTH / 2, BLOCK_HEIGHT / 2));
+
    ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W),
             ureg_negate(ureg_scalar(line, TGSI_SWIZZLE_Z)),
             ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z),
             ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
 
+   ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z),
+            ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)),
+            ureg_imm1f(shader, 0.0f),
+            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
+
    return tmp;
 }
 
@@ -331,13 +340,12 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
 static struct ureg_dst
 fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src ref_frames, frame_pred, bkwd_pred;
+   struct ureg_src ref_frames, bkwd_pred;
    struct ureg_src tc[4], sampler[2];
    struct ureg_dst ref[2], t_tc, result;
    unsigned i, intra_label, bi_label, label;
 
    ref_frames = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES, TGSI_INTERPOLATE_CONSTANT);
-   frame_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FRAME_PRED, TGSI_INTERPOLATE_CONSTANT);
    bkwd_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BKWD_PRED, TGSI_INTERPOLATE_CONSTANT);
 
    for (i = 0; i < 4; ++i)
@@ -358,23 +366,12 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
    ureg_IF(shader, ureg_scalar(ref_frames, TGSI_SWIZZLE_X), &bi_label);
 
       t_tc = ureg_DECL_temporary(shader);
-      ureg_IF(shader, frame_pred, &label);
-
-         /*
-          * result = tex(tc[0], sampler[0])
-          */
-         ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY), tc[0]);
-
-      ureg_ELSE(shader, &label);
-
-         /*
-          * result = tex(field.y ? tc[1] : tc[0], sampler[0])
-          */
-         ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
-            tc[1], tc[0]);
-
-      ureg_ENDIF(shader);
+      /*
+       * result = tex(field.z ? tc[1] : tc[0], sampler[0])
+       */
+      ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
+               ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
+               tc[1], tc[0]);
 
       ureg_IF(shader, bkwd_pred, &label);
          ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[1]);
@@ -386,33 +383,25 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 
    ureg_ELSE(shader, &bi_label);
 
-      ureg_IF(shader, frame_pred, &label);
-         /*
-          * ref[0..1] = tex(tc[0..1], sampler[0..1])
-          */
-         ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, tc[0], sampler[0]);
-         ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]);
-      ureg_ELSE(shader, &label);
-         t_tc = ureg_DECL_temporary(shader);
-
-         /*
-          * if (field.y)
-          *    ref[0..1] = tex(tc[0..1], sampler[0..1])
-          * else
-          *    ref[0..1] = tex(tc[2..3], sampler[0..1])
-          */
-         ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
-            tc[1], tc[0]);
-         ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]);
+      t_tc = ureg_DECL_temporary(shader);
 
-         ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
-            tc[3], tc[2]);
-         ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[1]);
+      /*
+       * if (field.z)
+       *    ref[0..1] = tex(tc[0..1], sampler[0..1])
+       * else
+       *    ref[0..1] = tex(tc[2..3], sampler[0..1])
+       */
+      ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
+         ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
+         tc[1], tc[0]);
+      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]);
+
+      ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
+         ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
+         tc[3], tc[2]);
+      ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[1]);
 
-         ureg_release_temporary(shader, t_tc);
-      ureg_ENDIF(shader);
+      ureg_release_temporary(shader, t_tc);
 
       ureg_LRP(shader, result, ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
 
@@ -752,8 +741,8 @@ get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4])
       {
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
 
-            mv[1].x = mb->pmv[0][1][0];
-            mv[1].y = mb->pmv[0][1][1];
+            mv[2].x = mb->pmv[0][1][0];
+            mv[2].y = mb->pmv[0][1][1];
 
          } else {
             mv[2].x = mb->pmv[0][1][0];
-- 
cgit v1.2.3


From 3b2ef2d007193f8a5f6979b378ee3c952ff39f7f Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 8 Dec 2010 22:35:30 +0100
Subject: [g3dvl] move buffer mapping/unmapping out of flush

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 59 +++++++++++++++---------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  4 ++
 src/gallium/drivers/softpipe/sp_video_context.c  |  4 ++
 3 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 445e2aae234..4b3d2d6d551 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -989,8 +989,23 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 }
 
 void
-vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
-                                         *renderer,
+vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer)
+{
+   unsigned i;
+
+   assert(renderer);
+
+   vl_idct_map_buffers(&renderer->idct_luma, &renderer->idct_y);
+   vl_idct_map_buffers(&renderer->idct_chroma, &renderer->idct_cr);
+   vl_idct_map_buffers(&renderer->idct_chroma, &renderer->idct_cb);
+
+   vl_vb_map(&renderer->pos, renderer->pipe);
+   for(i = 0; i < 4; ++i)
+      vl_vb_map(&renderer->mv[i], renderer->pipe);
+}
+
+void
+vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
                                          struct pipe_surface *surface,
                                          struct pipe_surface *past,
                                          struct pipe_surface *future,
@@ -1024,16 +1039,29 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
       num_macroblocks -= num_to_submit;
 
       if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
+         vl_mpeg12_mc_unmap_buffer(renderer);
          vl_mpeg12_mc_renderer_flush(renderer);
-
-         /* Next time we get this surface it may have new ref frames */
-         pipe_surface_reference(&renderer->surface, NULL);
-         pipe_surface_reference(&renderer->past, NULL);
-         pipe_surface_reference(&renderer->future, NULL);
+         vl_mpeg12_mc_map_buffer(renderer);
       }
    }
 }
 
+void
+vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer)
+{
+   unsigned i;
+
+   assert(renderer);
+
+   vl_idct_unmap_buffers(&renderer->idct_luma, &renderer->idct_y);
+   vl_idct_unmap_buffers(&renderer->idct_chroma, &renderer->idct_cr);
+   vl_idct_unmap_buffers(&renderer->idct_chroma, &renderer->idct_cb);
+
+   vl_vb_unmap(&renderer->pos, renderer->pipe);
+   for(i = 0; i < 4; ++i)
+      vl_vb_unmap(&renderer->mv[i], renderer->pipe);
+}
+
 void
 vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
 {
@@ -1045,20 +1073,13 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
    if (renderer->num_macroblocks == 0)
       return;
 
-   vl_idct_unmap_buffers(&renderer->idct_luma, &renderer->idct_y);
-   vl_idct_unmap_buffers(&renderer->idct_chroma, &renderer->idct_cr);
-   vl_idct_unmap_buffers(&renderer->idct_chroma, &renderer->idct_cb);
-
    vl_idct_flush(&renderer->idct_luma, &renderer->idct_y);
    vl_idct_flush(&renderer->idct_chroma, &renderer->idct_cr);
    vl_idct_flush(&renderer->idct_chroma, &renderer->idct_cb);
 
-   vl_vb_unmap(&renderer->pos, renderer->pipe);
    vl_vb_restart(&renderer->pos);
-   for(i = 0; i < 4; ++i) {
-      vl_vb_unmap(&renderer->mv[i], renderer->pipe);
+   for(i = 0; i < 4; ++i)
       vl_vb_restart(&renderer->mv[i]);
-   }
 
    renderer->fb_state.cbufs[0] = renderer->surface;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
@@ -1085,13 +1106,5 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
 
    renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, renderer->fence);
 
-   vl_idct_map_buffers(&renderer->idct_luma, &renderer->idct_y);
-   vl_idct_map_buffers(&renderer->idct_chroma, &renderer->idct_cr);
-   vl_idct_map_buffers(&renderer->idct_chroma, &renderer->idct_cb);
-
-   vl_vb_map(&renderer->pos, renderer->pipe);
-   for(i = 0; i < 4; ++i)
-      vl_vb_map(&renderer->mv[i], renderer->pipe);
-
    renderer->num_macroblocks = 0;
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index e12ac50ae0d..2f35bcbcd45 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -112,6 +112,8 @@ bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
 
 void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 
+void vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer);
+
 void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
                                               struct pipe_surface *surface,
                                               struct pipe_surface *past,
@@ -120,6 +122,8 @@ void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *rend
                                               struct pipe_mpeg12_macroblock *mpeg12_macroblocks,
                                               struct pipe_fence_handle **fence);
 
+void vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer);
+
 void vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer);
 
 #endif /* vl_mpeg12_mc_renderer_h */
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 35636d8475c..166c09e20be 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -282,7 +282,9 @@ sp_mpeg12_render_picture(struct pipe_video_context     *vpipe,
    assert(dst_surface);
    assert(dst_area);
 
+   vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer);
    vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer);
+   vl_mpeg12_mc_map_buffer(&ctx->mc_renderer);
 
    vl_compositor_render(&ctx->compositor, src_surface,
                         picture_type, src_area, dst_surface, dst_area, fence);
@@ -328,7 +330,9 @@ sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
    assert(dt);
 
    if (ctx->decode_target != dt) {
+      vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer);
       vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer);
+      vl_mpeg12_mc_map_buffer(&ctx->mc_renderer);
       pipe_surface_reference(&ctx->decode_target, dt);
    }
 }
-- 
cgit v1.2.3


From 1e3f5e9520940592dae1c37bb4c8d3fc156aa5d5 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 8 Dec 2010 23:37:57 +0100
Subject: [g3dvl] split mc code into state and buffer

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 332 ++++++++++++-----------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  31 ++-
 src/gallium/drivers/softpipe/sp_video_context.c  |  26 +-
 src/gallium/drivers/softpipe/sp_video_context.h  |   1 +
 4 files changed, 224 insertions(+), 166 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 4b3d2d6d551..97548491147 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -533,36 +533,19 @@ static bool
 init_buffers(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_resource *idct_matrix;
-   struct pipe_resource template;
    struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
-   struct pipe_sampler_view sampler_view;
 
    const unsigned mbw =
       align(r->buffer_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
    const unsigned mbh =
       align(r->buffer_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
 
-   unsigned i, stride;
+   unsigned i, chroma_width, chroma_height;
 
    assert(r);
 
    r->macroblocks_per_batch =
       mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
-   r->num_macroblocks = 0;
-
-   memset(&template, 0, sizeof(struct pipe_resource));
-   template.target = PIPE_TEXTURE_2D;
-   /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
-   template.format = PIPE_FORMAT_R16_SNORM;
-   template.last_level = 0;
-   template.width0 = r->buffer_width;
-   template.height0 = r->buffer_height;
-   template.depth0 = 1;
-   template.usage = PIPE_USAGE_DYNAMIC;
-   template.bind = PIPE_BIND_SAMPLER_VIEW;
-   template.flags = 0;
-
-   r->textures.individual.y = r->pipe->screen->resource_create(r->pipe->screen, &template);
 
    if (!(idct_matrix = vl_idct_upload_matrix(r->pipe)))
       return false;
@@ -570,47 +553,24 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    if (!vl_idct_init(&r->idct_luma, r->pipe, r->buffer_width, r->buffer_height, idct_matrix))
       return false;
 
-   if (!vl_idct_init_buffer(&r->idct_luma, &r->idct_y, r->textures.individual.y))
-      return false;
-
-   vl_idct_map_buffers(&r->idct_luma, &r->idct_y);
-
    if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      template.width0 = r->buffer_width / 2;
-      template.height0 = r->buffer_height / 2;
+      chroma_width = r->buffer_width / 2;
+      chroma_height = r->buffer_height / 2;
+   } else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
+      chroma_width = r->buffer_width;
+      chroma_height = r->buffer_height / 2;
+   } else {
+      chroma_width = r->buffer_width;
+      chroma_height = r->buffer_height;
    }
-   else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
-      template.height0 = r->buffer_height / 2;
-
-   r->textures.individual.cb =
-      r->pipe->screen->resource_create(r->pipe->screen, &template);
-   r->textures.individual.cr =
-      r->pipe->screen->resource_create(r->pipe->screen, &template);
-
-   if(!vl_idct_init(&r->idct_chroma, r->pipe, template.width0, template.height0, idct_matrix))
-      return false;
 
-   if (!vl_idct_init_buffer(&r->idct_chroma, &r->idct_cb, r->textures.individual.cb))
+   if(!vl_idct_init(&r->idct_chroma, r->pipe, chroma_width, chroma_height, idct_matrix))
       return false;
 
-   vl_idct_map_buffers(&r->idct_chroma, &r->idct_cb);
-
-   if (!vl_idct_init_buffer(&r->idct_chroma, &r->idct_cr, r->textures.individual.cr))
-      return false;
-
-   vl_idct_map_buffers(&r->idct_chroma, &r->idct_cr);
-
-   for (i = 0; i < 3; ++i) {
-      u_sampler_view_default_template(&sampler_view,
-                                      r->textures.all[i],
-                                      r->textures.all[i]->format);
-      r->sampler_views.all[i] = r->pipe->create_sampler_view(r->pipe, r->textures.all[i], &sampler_view);
-   }
-
    memset(&vertex_elems, 0, sizeof(vertex_elems));
 
    vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
-   r->vertex_bufs.individual.quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch);
+   r->quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch);
 
    /* Position element */
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
@@ -639,21 +599,12 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    /* forward=0.0f backward=1.0f */
    vertex_elems[VS_I_BKWD_PRED].src_format = PIPE_FORMAT_R32_FLOAT;
 
-   stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 9, 1);
-
-   r->vertex_bufs.individual.pos = vl_vb_init(
-      &r->pos, r->pipe, r->macroblocks_per_batch, 
-      sizeof(struct vertex_stream_0) / sizeof(float),
-      stride);
+   r->pos_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 9, 1);
 
    for (i = 0; i < 4; ++i) {
       /* motion vector 0..4 element */
       vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R32G32_FLOAT;
-      stride = vl_vb_element_helper(&vertex_elems[VS_I_MV0 + i], 1, i + 2);
-      r->vertex_bufs.individual.mv[i] = vl_vb_init(
-         &r->mv[i], r->pipe, r->macroblocks_per_batch,
-         sizeof(struct vertex2f) / sizeof(float),
-         stride);
+      r->mv_stride[i] = vl_vb_element_helper(&vertex_elems[VS_I_MV0 + i], 1, i + 2);
    }
 
    r->vertex_elems_state = r->pipe->create_vertex_elements_state(
@@ -674,32 +625,11 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
 static void
 cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
 {
-   unsigned i;
-
    assert(r);
 
-   for (i = 0; i < 3; ++i) {
-      pipe_sampler_view_reference(&r->sampler_views.all[i], NULL);
-      pipe_resource_reference(&r->vertex_bufs.all[i].buffer, NULL);
-      pipe_resource_reference(&r->textures.all[i], NULL);
-   }
-
    r->pipe->delete_vs_state(r->pipe, r->vs);
    r->pipe->delete_fs_state(r->pipe, r->fs);
 
-   vl_vb_cleanup(&r->pos);
-
-   for (i = 0; i < 4; ++i)
-      vl_vb_cleanup(&r->mv[i]);
-
-   vl_idct_unmap_buffers(&r->idct_luma, &r->idct_y);
-   vl_idct_unmap_buffers(&r->idct_chroma, &r->idct_cb);
-   vl_idct_unmap_buffers(&r->idct_chroma, &r->idct_cr);
-
-   vl_idct_cleanup_buffer(&r->idct_luma, &r->idct_y);
-   vl_idct_cleanup_buffer(&r->idct_chroma, &r->idct_cb);
-   vl_idct_cleanup_buffer(&r->idct_chroma, &r->idct_cr);
-
    vl_idct_cleanup(&r->idct_luma);
    vl_idct_cleanup(&r->idct_chroma);
 
@@ -816,6 +746,7 @@ empty_block(enum pipe_video_chroma_format chroma_format,
 
 static void
 grab_vectors(struct vl_mpeg12_mc_renderer *r,
+             struct vl_mpeg12_mc_buffer *buffer,
              struct pipe_mpeg12_macroblock *mb)
 {
    struct vertex2f mv[4];
@@ -856,16 +787,18 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
          assert(0);
    }
 
-   vl_vb_add_block(&r->pos, (float*)&info);
+   vl_vb_add_block(&buffer->pos, (float*)&info);
 
    get_motion_vectors(mb, mv);
    for ( j = 0; j < 4; ++j )
-      vl_vb_add_block(&r->mv[j], (float*)&mv[j]);
+      vl_vb_add_block(&buffer->mv[j], (float*)&mv[j]);
 }
 
 static void
-grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
-            enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
+grab_blocks(struct vl_mpeg12_mc_renderer *r,
+            struct vl_mpeg12_mc_buffer *buffer,
+            unsigned mbx, unsigned mby,
+            unsigned cbp, short *blocks)
 {
    unsigned tb = 0;
    unsigned x, y;
@@ -876,7 +809,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
    for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
          if (!empty_block(r->chroma_format, cbp, 0, x, y)) {
-            vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, blocks);
+            vl_idct_add_block(&buffer->idct_y, mbx * 2 + x, mby * 2 + y, blocks);
             blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
          }
       }
@@ -888,9 +821,9 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
    for (tb = 1; tb < 3; ++tb) {
       if (!empty_block(r->chroma_format, cbp, tb, 0, 0)) {
          if(tb == 1)
-            vl_idct_add_block(&r->idct_cb, mbx, mby, blocks);
+            vl_idct_add_block(&buffer->idct_cb, mbx, mby, blocks);
          else
-            vl_idct_add_block(&r->idct_cr, mbx, mby, blocks);
+            vl_idct_add_block(&buffer->idct_cr, mbx, mby, blocks);
          blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
       }
    }
@@ -898,17 +831,18 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
 
 static void
 grab_macroblock(struct vl_mpeg12_mc_renderer *r,
+                struct vl_mpeg12_mc_buffer *buffer,
                 struct pipe_mpeg12_macroblock *mb)
 {
    assert(r);
    assert(mb);
    assert(mb->blocks);
-   assert(r->num_macroblocks < r->macroblocks_per_batch);
+   assert(buffer->num_macroblocks < r->macroblocks_per_batch);
 
-   grab_vectors(r, mb);
-   grab_blocks(r, mb->mbx, mb->mby, mb->dct_type, mb->cbp, mb->blocks);
+   grab_vectors(r, buffer, mb);
+   grab_blocks(r, buffer, mb->mbx, mb->mby, mb->cbp, mb->blocks);
 
-   ++r->num_macroblocks;
+   ++buffer->num_macroblocks;
 }
 
 static void
@@ -959,11 +893,6 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    if (!init_buffers(renderer))
       goto error_buffers;
 
-   renderer->surface = NULL;
-   renderer->past = NULL;
-   renderer->future = NULL;
-   renderer->num_macroblocks = 0;
-
    return true;
 
 error_buffers:
@@ -982,129 +911,230 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
    util_delete_keymap(renderer->texview_map, renderer->pipe);
    cleanup_pipe_state(renderer);
    cleanup_buffers(renderer);
+}
+
+bool
+vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
+{
+   struct pipe_resource template;
+   struct pipe_sampler_view sampler_view;
+
+   unsigned i;
+
+   assert(renderer && buffer);
 
-   pipe_surface_reference(&renderer->surface, NULL);
-   pipe_surface_reference(&renderer->past, NULL);
-   pipe_surface_reference(&renderer->future, NULL);
+   buffer->surface = NULL;
+   buffer->past = NULL;
+   buffer->future = NULL;
+   buffer->num_macroblocks = 0;
+
+   memset(&template, 0, sizeof(struct pipe_resource));
+   template.target = PIPE_TEXTURE_2D;
+   /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
+   template.format = PIPE_FORMAT_R16_SNORM;
+   template.last_level = 0;
+   template.width0 = renderer->buffer_width;
+   template.height0 = renderer->buffer_height;
+   template.depth0 = 1;
+   template.usage = PIPE_USAGE_DYNAMIC;
+   template.bind = PIPE_BIND_SAMPLER_VIEW;
+   template.flags = 0;
+
+   buffer->textures.individual.y = renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
+
+   if (!vl_idct_init_buffer(&renderer->idct_luma, &buffer->idct_y, buffer->textures.individual.y))
+      return false;
+
+   if (renderer->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+      template.width0 = renderer->buffer_width / 2;
+      template.height0 = renderer->buffer_height / 2;
+   }
+   else if (renderer->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
+      template.height0 = renderer->buffer_height / 2;
+
+   buffer->textures.individual.cb =
+      renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
+   buffer->textures.individual.cr =
+      renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
+
+   if (!vl_idct_init_buffer(&renderer->idct_chroma, &buffer->idct_cb, buffer->textures.individual.cb))
+      return false;
+
+   if (!vl_idct_init_buffer(&renderer->idct_chroma, &buffer->idct_cr, buffer->textures.individual.cr))
+      return false;
+
+   for (i = 0; i < 3; ++i) {
+      u_sampler_view_default_template(&sampler_view,
+                                      buffer->textures.all[i],
+                                      buffer->textures.all[i]->format);
+      buffer->sampler_views.all[i] = renderer->pipe->create_sampler_view(
+         renderer->pipe, buffer->textures.all[i], &sampler_view);
+   }
+
+   buffer->vertex_bufs.individual.quad.stride = renderer->quad.stride;
+   buffer->vertex_bufs.individual.quad.max_index = renderer->quad.max_index;
+   buffer->vertex_bufs.individual.quad.buffer_offset = renderer->quad.buffer_offset;
+   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, renderer->quad.buffer);
+
+   buffer->vertex_bufs.individual.pos = vl_vb_init(
+      &buffer->pos, renderer->pipe, renderer->macroblocks_per_batch, 
+      sizeof(struct vertex_stream_0) / sizeof(float),
+      renderer->pos_stride);
+
+   for (i = 0; i < 4; ++i) {
+      buffer->vertex_bufs.individual.mv[i] = vl_vb_init(
+         &buffer->mv[i], renderer->pipe, renderer->macroblocks_per_batch,
+         sizeof(struct vertex2f) / sizeof(float),
+         renderer->mv_stride[i]);
+   }
+
+   return true;
 }
 
 void
-vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer)
+vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
 {
    unsigned i;
 
-   assert(renderer);
+   assert(renderer && buffer);
+
+   for (i = 0; i < 3; ++i) {
+      pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
+      pipe_resource_reference(&buffer->vertex_bufs.all[i].buffer, NULL);
+      pipe_resource_reference(&buffer->textures.all[i], NULL);
+   }
 
-   vl_idct_map_buffers(&renderer->idct_luma, &renderer->idct_y);
-   vl_idct_map_buffers(&renderer->idct_chroma, &renderer->idct_cr);
-   vl_idct_map_buffers(&renderer->idct_chroma, &renderer->idct_cb);
+   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL);
+   vl_vb_cleanup(&buffer->pos);
+   for (i = 0; i < 4; ++i)
+      vl_vb_cleanup(&buffer->mv[i]);
 
-   vl_vb_map(&renderer->pos, renderer->pipe);
+   vl_idct_cleanup_buffer(&renderer->idct_luma, &buffer->idct_y);
+   vl_idct_cleanup_buffer(&renderer->idct_chroma, &buffer->idct_cb);
+   vl_idct_cleanup_buffer(&renderer->idct_chroma, &buffer->idct_cr);
+
+   pipe_surface_reference(&buffer->surface, NULL);
+   pipe_surface_reference(&buffer->past, NULL);
+   pipe_surface_reference(&buffer->future, NULL);
+}
+
+void
+vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
+{
+   unsigned i;
+
+   assert(renderer && buffer);
+
+   vl_idct_map_buffers(&renderer->idct_luma, &buffer->idct_y);
+   vl_idct_map_buffers(&renderer->idct_chroma, &buffer->idct_cr);
+   vl_idct_map_buffers(&renderer->idct_chroma, &buffer->idct_cb);
+
+   vl_vb_map(&buffer->pos, renderer->pipe);
    for(i = 0; i < 4; ++i)
-      vl_vb_map(&renderer->mv[i], renderer->pipe);
+      vl_vb_map(&buffer->mv[i], renderer->pipe);
 }
 
 void
 vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
+                                         struct vl_mpeg12_mc_buffer *buffer,
                                          struct pipe_surface *surface,
                                          struct pipe_surface *past,
                                          struct pipe_surface *future,
                                          unsigned num_macroblocks,
-                                         struct pipe_mpeg12_macroblock
-                                         *mpeg12_macroblocks,
+                                         struct pipe_mpeg12_macroblock *mpeg12_macroblocks,
                                          struct pipe_fence_handle **fence)
 {
-   assert(renderer);
+   assert(renderer && buffer);
    assert(surface);
    assert(num_macroblocks);
    assert(mpeg12_macroblocks);
 
-   if (surface != renderer->surface) {
-      pipe_surface_reference(&renderer->surface, surface);
-      pipe_surface_reference(&renderer->past, past);
-      pipe_surface_reference(&renderer->future, future);
-      renderer->fence = fence;
+   if (surface != buffer->surface) {
+      pipe_surface_reference(&buffer->surface, surface);
+      pipe_surface_reference(&buffer->past, past);
+      pipe_surface_reference(&buffer->future, future);
+      buffer->fence = fence;
    }
 
    while (num_macroblocks) {
-      unsigned left_in_batch = renderer->macroblocks_per_batch - renderer->num_macroblocks;
+      unsigned left_in_batch = renderer->macroblocks_per_batch - buffer->num_macroblocks;
       unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
       unsigned i;
 
       for (i = 0; i < num_to_submit; ++i) {
          assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
-         grab_macroblock(renderer, &mpeg12_macroblocks[i]);
+         grab_macroblock(renderer, buffer, &mpeg12_macroblocks[i]);
       }
 
       num_macroblocks -= num_to_submit;
 
-      if (renderer->num_macroblocks == renderer->macroblocks_per_batch) {
-         vl_mpeg12_mc_unmap_buffer(renderer);
-         vl_mpeg12_mc_renderer_flush(renderer);
-         vl_mpeg12_mc_map_buffer(renderer);
+      if (buffer->num_macroblocks == renderer->macroblocks_per_batch) {
+         vl_mpeg12_mc_unmap_buffer(renderer, buffer);
+         vl_mpeg12_mc_renderer_flush(renderer, buffer);
+         vl_mpeg12_mc_map_buffer(renderer, buffer);
       }
    }
 }
 
 void
-vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer)
+vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
 {
    unsigned i;
 
-   assert(renderer);
+   assert(renderer && buffer);
 
-   vl_idct_unmap_buffers(&renderer->idct_luma, &renderer->idct_y);
-   vl_idct_unmap_buffers(&renderer->idct_chroma, &renderer->idct_cr);
-   vl_idct_unmap_buffers(&renderer->idct_chroma, &renderer->idct_cb);
+   vl_idct_unmap_buffers(&renderer->idct_luma, &buffer->idct_y);
+   vl_idct_unmap_buffers(&renderer->idct_chroma, &buffer->idct_cr);
+   vl_idct_unmap_buffers(&renderer->idct_chroma, &buffer->idct_cb);
 
-   vl_vb_unmap(&renderer->pos, renderer->pipe);
+   vl_vb_unmap(&buffer->pos, renderer->pipe);
    for(i = 0; i < 4; ++i)
-      vl_vb_unmap(&renderer->mv[i], renderer->pipe);
+      vl_vb_unmap(&buffer->mv[i], renderer->pipe);
 }
 
 void
-vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer)
+vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
 {
    unsigned i;
 
-   assert(renderer);
-   assert(renderer->num_macroblocks <= renderer->macroblocks_per_batch);
+   assert(renderer && buffer);
+   assert(buffer->num_macroblocks <= renderer->macroblocks_per_batch);
 
-   if (renderer->num_macroblocks == 0)
+   if (buffer->num_macroblocks == 0)
       return;
 
-   vl_idct_flush(&renderer->idct_luma, &renderer->idct_y);
-   vl_idct_flush(&renderer->idct_chroma, &renderer->idct_cr);
-   vl_idct_flush(&renderer->idct_chroma, &renderer->idct_cb);
+   vl_idct_flush(&renderer->idct_luma, &buffer->idct_y);
+   vl_idct_flush(&renderer->idct_chroma, &buffer->idct_cr);
+   vl_idct_flush(&renderer->idct_chroma, &buffer->idct_cb);
 
-   vl_vb_restart(&renderer->pos);
+   vl_vb_restart(&buffer->pos);
    for(i = 0; i < 4; ++i)
-      vl_vb_restart(&renderer->mv[i]);
+      vl_vb_restart(&buffer->mv[i]);
 
-   renderer->fb_state.cbufs[0] = renderer->surface;
+   renderer->fb_state.cbufs[0] = buffer->surface;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
    renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
-   renderer->pipe->set_vertex_buffers(renderer->pipe, 6, renderer->vertex_bufs.all);
+   renderer->pipe->set_vertex_buffers(renderer->pipe, 6, buffer->vertex_bufs.all);
    renderer->pipe->bind_vertex_elements_state(renderer->pipe, renderer->vertex_elems_state);
 
-   if (renderer->past) {
-      renderer->textures.individual.ref[0] = renderer->past->texture;
-      renderer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, renderer->past);
+   if (buffer->past) {
+      buffer->textures.individual.ref[0] = buffer->past->texture;
+      buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->past);
    }
 
-   if (renderer->future) {
-      renderer->textures.individual.ref[1] = renderer->future->texture;
-      renderer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, renderer->future);
+   if (buffer->future) {
+      buffer->textures.individual.ref[1] = buffer->future->texture;
+      buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->future);
    }
-   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 5, renderer->sampler_views.all);
+   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 5, buffer->sampler_views.all);
    renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 5, renderer->samplers.all);
 
    renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs);
-   util_draw_arrays(renderer->pipe, PIPE_PRIM_QUADS, 0, renderer->num_macroblocks * 4);
+   util_draw_arrays(renderer->pipe, PIPE_PRIM_QUADS, 0, buffer->num_macroblocks * 4);
 
-   renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, renderer->fence);
+   renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, buffer->fence);
 
-   renderer->num_macroblocks = 0;
+   buffer->num_macroblocks = 0;
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 2f35bcbcd45..157cbee1362 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -55,20 +55,20 @@ struct vl_mpeg12_mc_renderer
    enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode;
    unsigned macroblocks_per_batch;
 
+   unsigned pos_stride;
+   unsigned mv_stride[4];
+
    struct pipe_viewport_state viewport;
    struct pipe_framebuffer_state fb_state;
 
    struct vl_idct idct_luma, idct_chroma;
-   struct vl_idct_buffer idct_y, idct_cb, idct_cr;
 
    void *vertex_elems_state;
    void *rs_state;
 
    void *vs, *fs;
 
-   struct vl_vertex_buffer pos;
-   struct vl_vertex_buffer mv[4];
-
+   struct pipe_vertex_buffer quad;
 
    union
    {
@@ -76,6 +76,16 @@ struct vl_mpeg12_mc_renderer
       struct { void *y, *cb, *cr, *ref[2]; } individual;
    } samplers;
 
+   struct keymap *texview_map;
+};
+
+struct vl_mpeg12_mc_buffer
+{
+   struct vl_idct_buffer idct_y, idct_cb, idct_cr;
+
+   struct vl_vertex_buffer pos;
+   struct vl_vertex_buffer mv[4];
+
    union
    {
       struct pipe_sampler_view *all[5];
@@ -99,8 +109,6 @@ struct vl_mpeg12_mc_renderer
    struct pipe_surface *surface, *past, *future;
    struct pipe_fence_handle **fence;
    unsigned num_macroblocks;
-
-   struct keymap *texview_map;
 };
 
 bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
@@ -112,9 +120,14 @@ bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
 
 void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 
-void vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer);
+bool vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer);
+
+void vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer);
+
+void vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer);
 
 void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
+                                              struct vl_mpeg12_mc_buffer *buffer,
                                               struct pipe_surface *surface,
                                               struct pipe_surface *past,
                                               struct pipe_surface *future,
@@ -122,8 +135,8 @@ void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *rend
                                               struct pipe_mpeg12_macroblock *mpeg12_macroblocks,
                                               struct pipe_fence_handle **fence);
 
-void vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer);
+void vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer);
 
-void vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer);
+void vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer);
 
 #endif /* vl_mpeg12_mc_renderer_h */
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 166c09e20be..442489d3ffc 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -54,6 +54,8 @@ sp_mpeg12_destroy(struct pipe_video_context *vpipe)
 
    pipe_surface_reference(&ctx->decode_target, NULL);
    vl_compositor_cleanup(&ctx->compositor);
+   vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
+   vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
    vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
    ctx->pipe->destroy(ctx->pipe);
 
@@ -120,6 +122,7 @@ sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
    assert(ctx->decode_target);
 
    vl_mpeg12_mc_renderer_render_macroblocks(&ctx->mc_renderer,
+                                            &ctx->mc_buffer,
                                             ctx->decode_target,
                                             past, future, num_macroblocks,
                                             mpeg12_macroblocks, fence);
@@ -282,9 +285,9 @@ sp_mpeg12_render_picture(struct pipe_video_context     *vpipe,
    assert(dst_surface);
    assert(dst_area);
 
-   vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer);
-   vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer);
-   vl_mpeg12_mc_map_buffer(&ctx->mc_renderer);
+   vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
+   vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &ctx->mc_buffer);
+   vl_mpeg12_mc_map_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
 
    vl_compositor_render(&ctx->compositor, src_surface,
                         picture_type, src_area, dst_surface, dst_area, fence);
@@ -330,9 +333,9 @@ sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
    assert(dt);
 
    if (ctx->decode_target != dt) {
-      vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer);
-      vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer);
-      vl_mpeg12_mc_map_buffer(&ctx->mc_renderer);
+      vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
+      vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &ctx->mc_buffer);
+      vl_mpeg12_mc_map_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
       pipe_surface_reference(&ctx->decode_target, dt);
    }
 }
@@ -487,7 +490,17 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
       return NULL;
    }
 
+   if (!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &ctx->mc_buffer)) {
+      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+      ctx->pipe->destroy(ctx->pipe);
+      FREE(ctx);
+      return NULL;
+   }
+
+   vl_mpeg12_mc_map_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
+
    if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) {
+      vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
@@ -496,6 +509,7 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
 
    if (!init_pipe_state(ctx)) {
       vl_compositor_cleanup(&ctx->compositor);
+      vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h
index dbf1bc1d8dd..7d8407d0c9d 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.h
+++ b/src/gallium/drivers/softpipe/sp_video_context.h
@@ -41,6 +41,7 @@ struct sp_mpeg12_context
    struct pipe_context *pipe;
    struct pipe_surface *decode_target;
    struct vl_mpeg12_mc_renderer mc_renderer;
+   struct vl_mpeg12_mc_buffer mc_buffer;
    struct vl_compositor compositor;
 
    void *rast;
-- 
cgit v1.2.3


From 7c4887f5ae642131d7895da5bffda77a6287c6d4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 9 Dec 2010 22:27:03 +0100
Subject: [g3dvl] fix idct matrix upload

---
 src/gallium/auxiliary/vl/vl_idct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 0a81134a789..087ac6e71bb 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -496,7 +496,7 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    struct pipe_box rect =
    {
       0, 0, 0,
-      BLOCK_WIDTH,
+      BLOCK_WIDTH / 4,
       BLOCK_HEIGHT,
       1
    };
-- 
cgit v1.2.3


From 25cdc79f32f9e4242d53a22a4debe80ca6a66348 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 9 Dec 2010 22:28:31 +0100
Subject: [g3dvl] fix buffer handling in mc code

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 97548491147..c688f2c5140 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -1054,6 +1054,10 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
       pipe_surface_reference(&buffer->past, past);
       pipe_surface_reference(&buffer->future, future);
       buffer->fence = fence;
+   } else {
+      /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
+      assert(buffer->past == past);
+      assert(buffer->future == future);
    }
 
    while (num_macroblocks) {
@@ -1071,6 +1075,9 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
       if (buffer->num_macroblocks == renderer->macroblocks_per_batch) {
          vl_mpeg12_mc_unmap_buffer(renderer, buffer);
          vl_mpeg12_mc_renderer_flush(renderer, buffer);
+         pipe_surface_reference(&buffer->surface, surface);
+         pipe_surface_reference(&buffer->past, past);
+         pipe_surface_reference(&buffer->future, future);
          vl_mpeg12_mc_map_buffer(renderer, buffer);
       }
    }
@@ -1121,12 +1128,19 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    if (buffer->past) {
       buffer->textures.individual.ref[0] = buffer->past->texture;
       buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->past);
+   } else {
+      buffer->textures.individual.ref[0] = buffer->surface->texture;
+      buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->surface);
    }
 
    if (buffer->future) {
       buffer->textures.individual.ref[1] = buffer->future->texture;
       buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->future);
+   } else {
+      buffer->textures.individual.ref[1] = buffer->surface->texture;
+      buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->surface);
    }
+
    renderer->pipe->set_fragment_sampler_views(renderer->pipe, 5, buffer->sampler_views.all);
    renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 5, renderer->samplers.all);
 
@@ -1136,5 +1150,10 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
 
    renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, buffer->fence);
 
+   /* Next time we get this surface it may have new ref frames */
+   pipe_surface_reference(&buffer->surface, NULL);
+   pipe_surface_reference(&buffer->past, NULL);
+   pipe_surface_reference(&buffer->future, NULL);
+
    buffer->num_macroblocks = 0;
 }
-- 
cgit v1.2.3


From 00e60387fc3a5ec2de9a2dd312427eff067ec47a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 9 Dec 2010 22:34:43 +0100
Subject: [g3dvl] try a different buffer aproach

---
 src/gallium/drivers/softpipe/sp_video_context.c | 94 +++++++++++++++++++++----
 src/gallium/drivers/softpipe/sp_video_context.h |  3 +-
 2 files changed, 81 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 442489d3ffc..34fa64e178e 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -31,12 +31,78 @@
 #include "sp_video_context.h"
 #include <util/u_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_keymap.h>
 #include <util/u_rect.h>
 #include <util/u_video.h>
 #include <util/u_surface.h>
 #include "sp_public.h"
 #include "sp_texture.h"
 
+#define NUM_BUFFERS 8
+
+static void
+flush_buffer(struct sp_mpeg12_context *ctx)
+{
+   assert(ctx);
+
+   if(ctx->mc_buffer != NULL) {
+
+      vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer, ctx->mc_buffer);
+      vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, ctx->mc_buffer);
+
+      ctx->mc_buffer = NULL;
+   }
+}
+
+static void
+rotate_buffer(struct sp_mpeg12_context *ctx)
+{
+   static unsigned key = 0;
+   struct vl_mpeg12_mc_buffer *buffer;
+
+   assert(ctx);
+
+   flush_buffer(ctx);
+
+   buffer = (struct vl_mpeg12_mc_buffer*)util_keymap_lookup(ctx->buffer_map, &key);
+   if (!buffer) {
+      boolean added_to_map;
+
+      buffer = CALLOC_STRUCT(vl_mpeg12_mc_buffer);
+      if (buffer == NULL)
+         return;
+
+      if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, buffer)) {
+         FREE(buffer);
+         return;
+      }
+
+      added_to_map = util_keymap_insert(ctx->buffer_map, &key, buffer, ctx);
+      assert(added_to_map);
+   }
+   ++key;
+   key %= NUM_BUFFERS;
+   ctx->mc_buffer = buffer;
+
+   vl_mpeg12_mc_map_buffer(&ctx->mc_renderer, ctx->mc_buffer);
+}
+
+static void
+delete_buffer(const struct keymap *map,
+              const void *key, void *data,
+              void *user)
+{
+   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)user;
+   struct vl_mpeg12_mc_buffer *buf = (struct vl_mpeg12_mc_buffer*)data;
+
+   assert(map);
+   assert(key);
+   assert(data);
+   assert(user);
+
+   vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, buf);
+}
+
 static void
 sp_mpeg12_destroy(struct pipe_video_context *vpipe)
 {
@@ -44,6 +110,8 @@ sp_mpeg12_destroy(struct pipe_video_context *vpipe)
 
    assert(vpipe);
 
+   flush_buffer(ctx);
+
    /* Asserted in softpipe_delete_fs_state() for some reason */
    ctx->pipe->bind_vs_state(ctx->pipe, NULL);
    ctx->pipe->bind_fs_state(ctx->pipe, NULL);
@@ -54,8 +122,7 @@ sp_mpeg12_destroy(struct pipe_video_context *vpipe)
 
    pipe_surface_reference(&ctx->decode_target, NULL);
    vl_compositor_cleanup(&ctx->compositor);
-   vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
-   vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
+   util_delete_keymap(ctx->buffer_map, ctx);
    vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
    ctx->pipe->destroy(ctx->pipe);
 
@@ -120,9 +187,10 @@ sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
    assert(macroblocks);
    assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
    assert(ctx->decode_target);
+   assert(ctx->mc_buffer);
 
    vl_mpeg12_mc_renderer_render_macroblocks(&ctx->mc_renderer,
-                                            &ctx->mc_buffer,
+                                            ctx->mc_buffer,
                                             ctx->decode_target,
                                             past, future, num_macroblocks,
                                             mpeg12_macroblocks, fence);
@@ -285,9 +353,7 @@ sp_mpeg12_render_picture(struct pipe_video_context     *vpipe,
    assert(dst_surface);
    assert(dst_area);
 
-   vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
-   vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &ctx->mc_buffer);
-   vl_mpeg12_mc_map_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
+   flush_buffer(ctx);
 
    vl_compositor_render(&ctx->compositor, src_surface,
                         picture_type, src_area, dst_surface, dst_area, fence);
@@ -332,10 +398,9 @@ sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
    assert(vpipe);
    assert(dt);
 
-   if (ctx->decode_target != dt) {
-      vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
-      vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &ctx->mc_buffer);
-      vl_mpeg12_mc_map_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
+   if (ctx->decode_target != dt || ctx->mc_buffer == NULL) {
+      rotate_buffer(ctx);
+
       pipe_surface_reference(&ctx->decode_target, dt);
    }
 }
@@ -490,17 +555,16 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
       return NULL;
    }
 
-   if (!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &ctx->mc_buffer)) {
+   ctx->buffer_map = util_new_keymap(sizeof(unsigned), -1, delete_buffer);
+   if (!ctx->buffer_map) {
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
    }
 
-   vl_mpeg12_mc_map_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
-
    if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) {
-      vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
+      util_delete_keymap(ctx->buffer_map, ctx);
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
@@ -509,7 +573,7 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
 
    if (!init_pipe_state(ctx)) {
       vl_compositor_cleanup(&ctx->compositor);
-      vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &ctx->mc_buffer);
+      util_delete_keymap(ctx->buffer_map, ctx);
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h
index 7d8407d0c9d..9b60bad6b8f 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.h
+++ b/src/gallium/drivers/softpipe/sp_video_context.h
@@ -41,7 +41,8 @@ struct sp_mpeg12_context
    struct pipe_context *pipe;
    struct pipe_surface *decode_target;
    struct vl_mpeg12_mc_renderer mc_renderer;
-   struct vl_mpeg12_mc_buffer mc_buffer;
+   struct keymap *buffer_map;
+   struct vl_mpeg12_mc_buffer *mc_buffer;
    struct vl_compositor compositor;
 
    void *rast;
-- 
cgit v1.2.3


From cf234984f436b77deefebd281c2eecd494398940 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 10 Dec 2010 00:13:32 +0100
Subject: [g3dvl] avoid some ELSE blocks

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 84 ++++++++++--------------
 1 file changed, 36 insertions(+), 48 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index c688f2c5140..18b1c59cdaa 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -191,7 +191,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Z), 
       ureg_scalar(interlaced, TGSI_SWIZZLE_X));
 
-   ureg_IF(shader, interlaced, &label);
+   ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_IF(shader, ureg_scalar(interlaced, TGSI_SWIZZLE_X), &label);
 
       ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
       ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
@@ -200,11 +202,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
       ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
       ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), scale);
 
-   ureg_ELSE(shader, &label);
-
-      ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-      ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-
    ureg_ENDIF(shader);
    ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
 
@@ -311,12 +308,11 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
             eb[1], eb[0]);
 
    /* r600g is ignoring TGSI_INTERPOLATE_CONSTANT, just workaround this */
-   ureg_SGE(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ), ureg_src(t_eb_info), ureg_imm1f(shader, 0.5f));
+   ureg_SLT(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ), ureg_src(t_eb_info), ureg_imm1f(shader, 0.5f));
 
+   ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.0f));
    for (i = 0; i < 3; ++i) {
       ureg_IF(shader, ureg_scalar(ureg_src(t_eb_info), TGSI_SWIZZLE_X + i), &label);
-         ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_imm1f(shader, 0.0f));
-      ureg_ELSE(shader, &label);
 
          /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
          if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
@@ -342,7 +338,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 {
    struct ureg_src ref_frames, bkwd_pred;
    struct ureg_src tc[4], sampler[2];
-   struct ureg_dst ref[2], t_tc, result;
+   struct ureg_dst ref[2], tmp, result;
    unsigned i, intra_label, bi_label, label;
 
    ref_frames = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES, TGSI_INTERPOLATE_CONSTANT);
@@ -356,60 +352,52 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
       ref[i] = ureg_DECL_temporary(shader);
    }
 
+   tmp = ureg_DECL_temporary(shader);
    result = ureg_DECL_temporary(shader);
 
-   ureg_SEQ(shader, ureg_writemask(result, TGSI_WRITEMASK_X), ref_frames, ureg_imm1f(shader, -1.0f));
-   ureg_IF(shader, ureg_scalar(ureg_src(result), TGSI_SWIZZLE_X), &intra_label);
-      ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
+   ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
 
-   ureg_ELSE(shader, &intra_label);
-   ureg_IF(shader, ureg_scalar(ref_frames, TGSI_SWIZZLE_X), &bi_label);
-
-      t_tc = ureg_DECL_temporary(shader);
-      /*
-       * result = tex(field.z ? tc[1] : tc[0], sampler[0])
-       */
-      ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ref_frames, ureg_imm1f(shader, 0.0f));
+   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), &intra_label);
+      ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY),
                ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
                tc[1], tc[0]);
 
-      ureg_IF(shader, bkwd_pred, &label);
-         ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[1]);
-      ureg_ELSE(shader, &label);
-         ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]);
-      ureg_ENDIF(shader);
-
-      ureg_release_temporary(shader, t_tc);
-
-   ureg_ELSE(shader, &bi_label);
+      ureg_IF(shader, ureg_scalar(ref_frames, TGSI_SWIZZLE_X), &bi_label);
 
-      t_tc = ureg_DECL_temporary(shader);
+         /*
+          * result = tex(field.z ? tc[1] : tc[0], sampler[bkwd_pred ? 1 : 0])
+          */
+         ureg_IF(shader, bkwd_pred, &label);
+            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]);
+         ureg_ELSE(shader, &label);
+            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(tmp), sampler[0]);
+         ureg_ENDIF(shader);
 
-      /*
-       * if (field.z)
-       *    ref[0..1] = tex(tc[0..1], sampler[0..1])
-       * else
-       *    ref[0..1] = tex(tc[2..3], sampler[0..1])
-       */
-      ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
-         ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
-         tc[1], tc[0]);
-      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[0]);
+      ureg_ELSE(shader, &bi_label);
 
-      ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
-         ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
-         tc[3], tc[2]);
-      ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(t_tc), sampler[1]);
+         /*
+          * if (field.z)
+          *    ref[0..1] = tex(tc[0..1], sampler[0..1])
+          * else
+          *    ref[0..1] = tex(tc[2..3], sampler[0..1])
+          */
+         ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[0]);
 
-      ureg_release_temporary(shader, t_tc);
+         ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
+            tc[3], tc[2]);
+         ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]);
 
-      ureg_LRP(shader, result, ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
+         ureg_LRP(shader, result, ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X),
+            ureg_src(ref[0]), ureg_src(ref[1]));
 
-   ureg_ENDIF(shader);
+      ureg_ENDIF(shader);
    ureg_ENDIF(shader);
 
    for (i = 0; i < 2; ++i)
       ureg_release_temporary(shader, ref[i]);
+   ureg_release_temporary(shader, tmp);
 
    return result;
 }
-- 
cgit v1.2.3


From 1fb4bf84d588f78056e3ba5849410895841fe01d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 10 Dec 2010 00:29:59 +0100
Subject: [g3dvl] move frame predition handling vom fragment into vertex shader

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 27 ++++++++++++------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 18b1c59cdaa..6ebbef38163 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -86,7 +86,6 @@ enum VS_OUTPUT
    VS_O_TEX2,
    VS_O_EB_0,
    VS_O_EB_1,
-   VS_O_FRAME_PRED,
    VS_O_REF_FRAMES,
    VS_O_BKWD_PRED,
    VS_O_MV0,
@@ -104,7 +103,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_src interlaced, frame_pred, ref_frames, bkwd_pred;
    struct ureg_dst t_vpos, t_vtex, t_vmv;
    struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4];
-   struct ureg_dst o_frame_pred, o_ref_frames, o_bkwd_pred;
+   struct ureg_dst o_ref_frames, o_bkwd_pred;
    unsigned i, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -133,7 +132,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);   
    o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0);
    o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1);
-   o_frame_pred = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FRAME_PRED);
    o_ref_frames = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES);
    o_bkwd_pred = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BKWD_PRED);
    
@@ -212,7 +210,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
             ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
             eb[1][1], eb[1][0]);
 
-   ureg_MOV(shader, ureg_writemask(o_frame_pred, TGSI_WRITEMASK_X), frame_pred);
    ureg_MOV(shader, ureg_writemask(o_ref_frames, TGSI_WRITEMASK_X), ref_frames);
    ureg_MOV(shader, ureg_writemask(o_bkwd_pred, TGSI_WRITEMASK_X), bkwd_pred);
 
@@ -220,8 +217,18 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
       0.5f / r->buffer_width,
       0.5f / r->buffer_height);
 
-   for (i = 0; i < 4; i++)
-      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), scale, vmv[i], ureg_src(t_vpos));
+   ureg_MAD(shader, ureg_writemask(o_vmv[0], TGSI_WRITEMASK_XY), scale, vmv[0], ureg_src(t_vpos));
+   ureg_MAD(shader, ureg_writemask(o_vmv[2], TGSI_WRITEMASK_XY), scale, vmv[2], ureg_src(t_vpos));
+
+   ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)),
+            vmv[0], vmv[1]);
+   ureg_MAD(shader, ureg_writemask(o_vmv[1], TGSI_WRITEMASK_XY), scale, ureg_src(t_vmv), ureg_src(t_vpos));
+
+   ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)),
+            vmv[2], vmv[3]);
+   ureg_MAD(shader, ureg_writemask(o_vmv[3], TGSI_WRITEMASK_XY), scale, ureg_src(t_vmv), ureg_src(t_vpos));
 
    ureg_release_temporary(shader, t_vtex);
    ureg_release_temporary(shader, t_vpos);
@@ -236,12 +243,11 @@ static struct ureg_dst
 calc_field(struct ureg_program *shader)
 {
    struct ureg_dst tmp;
-   struct ureg_src line, frame_pred;
+   struct ureg_src line;
 
    tmp = ureg_DECL_temporary(shader);
 
    line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
-   frame_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FRAME_PRED, TGSI_INTERPOLATE_CONSTANT);
 
    /*
     * line.xy going from 0 to 8 in steps of 0.5
@@ -262,11 +268,6 @@ calc_field(struct ureg_program *shader)
             ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z),
             ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
 
-   ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z),
-            ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)),
-            ureg_imm1f(shader, 0.0f),
-            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
-
    return tmp;
 }
 
-- 
cgit v1.2.3


From 593a2cf8c54bbe45efd7c142e9bc97d111e8ed88 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 10 Dec 2010 01:27:21 +0100
Subject: [g3dvl] move interlaced dct handling into vertex shader

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 46 +++++++++++-------------
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 6ebbef38163..3700de16d1b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -184,13 +184,14 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
-   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_XY), vrect, 
-      ureg_imm2f(shader, MACROBLOCK_WIDTH / 2, MACROBLOCK_HEIGHT / 2));
-   ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Z), 
-      ureg_scalar(interlaced, TGSI_SWIZZLE_X));
-
    ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+
+   ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_scalar(vrect, TGSI_SWIZZLE_Y));
+   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), 
+      vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
+
    ureg_IF(shader, ureg_scalar(interlaced, TGSI_SWIZZLE_X), &label);
 
       ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
@@ -200,8 +201,11 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
       ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
       ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), scale);
 
+      ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X),
+         ureg_scalar(vrect, TGSI_SWIZZLE_Y),
+         ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
+
    ureg_ENDIF(shader);
-   ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
 
    ureg_CMP(shader, ureg_writemask(o_eb[0], TGSI_WRITEMASK_XYZ),
             ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
@@ -250,23 +254,15 @@ calc_field(struct ureg_program *shader)
    line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
 
    /*
-    * line.xy going from 0 to 8 in steps of 0.5
-    * line.z flag that controls interlacing
+    * line.x going from 0 to 1 in steps of if not interlaced
+    * line.x going from 0 to 8 in steps of 0.5 if interlaced
+    * line.y going from 0 to 8 in steps of 0.5
     *
-    * tmp.z = fraction(line.y)
-    * tmp.z = tmp.z >= 0.5 ? 1 : 0
-    * tmp.xy = line >= 4 ? 1 : 0
-    * tmp.w = line.z ? tmp.z : tmp.y
-    * tmp.z = frame_pred ? 0.0f : tmp.z
+    * tmp.xy = fraction(line)
+    * tmp.xy = tmp.xy >= 0.5 ? 1 : 0
     */
-   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(line, TGSI_SWIZZLE_Y));
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line, ureg_imm2f(shader, BLOCK_WIDTH / 2, BLOCK_HEIGHT / 2));
-
-   ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W),
-            ureg_negate(ureg_scalar(line, TGSI_SWIZZLE_Z)),
-            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z),
-            ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line);
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
 
    return tmp;
 }
@@ -301,11 +297,11 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
     */
 
    ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_W)),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)),
             tc[1], tc[0]);
 
    ureg_CMP(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_W)),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)),
             eb[1], eb[0]);
 
    /* r600g is ignoring TGSI_INTERPOLATE_CONSTANT, just workaround this */
@@ -361,7 +357,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
    ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ref_frames, ureg_imm1f(shader, 0.0f));
    ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), &intra_label);
       ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY),
-               ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
+               ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
                tc[1], tc[0]);
 
       ureg_IF(shader, ureg_scalar(ref_frames, TGSI_SWIZZLE_X), &bi_label);
@@ -386,7 +382,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
          ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[0]);
 
          ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
             tc[3], tc[2]);
          ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]);
 
-- 
cgit v1.2.3


From 9c296be7d6fc4cabed97b9d10e24c226b2ee7d08 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 10 Dec 2010 02:56:15 +0100
Subject: [g3dvl] make render targets full configureable

---
 src/gallium/auxiliary/vl/vl_idct.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 087ac6e71bb..4afa4ffdd5a 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -45,7 +45,7 @@
 #define STAGE1_SCALE 4.0f
 #define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE)
 
-#define NR_RENDER_TARGETS 1
+#define NR_RENDER_TARGETS 4
 
 enum VS_INPUT
 {
@@ -156,10 +156,13 @@ fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
       ureg_MOV(shader, ureg_writemask(t_tc, wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_X));
    }
 
-#if NR_RENDER_TARGETS == 8
-   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_scalar(block, TGSI_SWIZZLE_X));
-#else
+#if NR_RENDER_TARGETS == 1
    ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
+#else
+   ureg_MUL(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), 
+      ureg_scalar(block, TGSI_SWIZZLE_X),
+      ureg_imm1f(shader, 8.0f / NR_RENDER_TARGETS));
+   ureg_FRC(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc));
 #endif
 
    ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
@@ -282,13 +285,13 @@ create_matrix_frag_shader(struct vl_idct *idct)
    
    for (i = 0; i < NR_RENDER_TARGETS; ++i) {
 
-#if NR_RENDER_TARGETS == 8
-      ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i));
-      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], block, true, true, BLOCK_WIDTH / 4);
-#elif NR_RENDER_TARGETS == 1
+#if NR_RENDER_TARGETS == 1
       fetch_four(shader, r, block, sampler[1], start[1], block, true, true, BLOCK_WIDTH / 4);
 #else
-#error invalid number of render targets
+      ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), 
+         ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i),
+         block);
+      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], block, true, true, BLOCK_WIDTH / 4);
 #endif
 
       for (j = 0; j < 4; ++j) {
-- 
cgit v1.2.3


From 680f1181389103427d57704e8ae2cc453d329f1f Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 10 Dec 2010 11:05:11 +0100
Subject: [g3dvl] double buffering seems to be enough

---
 src/gallium/drivers/softpipe/sp_video_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 34fa64e178e..355ad75064c 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -38,7 +38,7 @@
 #include "sp_public.h"
 #include "sp_texture.h"
 
-#define NUM_BUFFERS 8
+#define NUM_BUFFERS 2
 
 static void
 flush_buffer(struct sp_mpeg12_context *ctx)
-- 
cgit v1.2.3


From 4a8420513d653cd2fccf93a51315120a1a5d0fcc Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 10 Dec 2010 11:31:17 +0100
Subject: [g3dvl] move idct stage 1 scaling into matrix

---
 src/gallium/auxiliary/vl/vl_idct.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 4afa4ffdd5a..efe2b7f2204 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -43,7 +43,7 @@
 #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
 
 #define STAGE1_SCALE 4.0f
-#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE)
+#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE / STAGE1_SCALE)
 
 #define NR_RENDER_TARGETS 4
 
@@ -273,14 +273,14 @@ create_matrix_frag_shader(struct vl_idct *idct)
    for (i = 0; i < NR_RENDER_TARGETS; ++i)
        fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
 
-   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), tex);
    for (i = 0; i < 4; ++i) {
-      fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], block, false, false, idct->buffer_width / 4);
-      ureg_MUL(shader, l[i][0], ureg_src(l[i][0]), ureg_imm1f(shader, STAGE1_SCALE));
-      ureg_MUL(shader, l[i][1], ureg_src(l[i][1]), ureg_imm1f(shader, STAGE1_SCALE));
-      if(i != 3)
+      if(i == 0)
+         ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), tex);
+      else
          ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), 
             ureg_src(t_tc), ureg_imm1f(shader, 1.0f / idct->buffer_height));
+
+      fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], block, false, false, idct->buffer_width / 4);
    }
    
    for (i = 0; i < NR_RENDER_TARGETS; ++i) {
@@ -530,7 +530,8 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    f = pipe->transfer_map(pipe, buf_transfer);
    for(i = 0; i < BLOCK_HEIGHT; ++i)
       for(j = 0; j < BLOCK_WIDTH; ++j)
-         f[i * pitch + j] = const_matrix[j][i]; // transpose
+         // transpose and scale
+         f[i * pitch + j] = const_matrix[j][i] * STAGE1_SCALE;
 
    pipe->transfer_unmap(pipe, buf_transfer);
    pipe->transfer_destroy(pipe, buf_transfer);
-- 
cgit v1.2.3


From ab130400cf91ab471e265e58193c95f04c7aeeda Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 10 Dec 2010 12:05:30 +0100
Subject: [g3dvl] move z-coord generation for multiple render targets into
 vertex shader

---
 src/gallium/auxiliary/vl/vl_idct.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index efe2b7f2204..f0063a453d0 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -119,7 +119,17 @@ create_vert_shader(struct vl_idct *idct)
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
    ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
+   ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
+
    ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+#if NR_RENDER_TARGETS == 1
+   ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
+#else
+   ureg_MUL(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_Z), 
+      ureg_scalar(vrect, TGSI_SWIZZLE_X),
+      ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS));
+#endif
+
    ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
 
    ureg_release_temporary(shader, t_vpos);
@@ -132,8 +142,8 @@ create_vert_shader(struct vl_idct *idct)
 static void
 fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
            struct ureg_src tc, struct ureg_src sampler,
-           struct ureg_src start, struct ureg_src block,
-           bool right_side, bool transposed, float size)
+           struct ureg_src start, bool right_side,
+           bool transposed, float size)
 {
    struct ureg_dst t_tc;
    unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
@@ -155,15 +165,7 @@ fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
       ureg_MOV(shader, ureg_writemask(t_tc, wm_start), ureg_scalar(start, TGSI_SWIZZLE_Y));
       ureg_MOV(shader, ureg_writemask(t_tc, wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_X));
    }
-
-#if NR_RENDER_TARGETS == 1
-   ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
-#else
-   ureg_MUL(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), 
-      ureg_scalar(block, TGSI_SWIZZLE_X),
-      ureg_imm1f(shader, 8.0f / NR_RENDER_TARGETS));
-   ureg_FRC(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc));
-#endif
+   ureg_FRC(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), tc);
 
    ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
    ureg_ADD(shader, ureg_writemask(t_tc, wm_start), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / size));
@@ -221,8 +223,8 @@ create_transpose_frag_shader(struct vl_idct *idct)
    start[0] = ureg_imm1f(shader, 0.0f);
    start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
 
-   fetch_four(shader, l, block, sampler[0], start[0], block, false, false, BLOCK_WIDTH / 4);
-   fetch_four(shader, r, tex, sampler[1], start[1], block, true, false, idct->buffer_height / 4);
+   fetch_four(shader, l, block, sampler[0], start[0], false, false, BLOCK_WIDTH / 4);
+   fetch_four(shader, r, tex, sampler[1], start[1], true, false, idct->buffer_height / 4);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -280,18 +282,18 @@ create_matrix_frag_shader(struct vl_idct *idct)
          ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), 
             ureg_src(t_tc), ureg_imm1f(shader, 1.0f / idct->buffer_height));
 
-      fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], block, false, false, idct->buffer_width / 4);
+      fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], false, false, idct->buffer_width / 4);
    }
    
    for (i = 0; i < NR_RENDER_TARGETS; ++i) {
 
 #if NR_RENDER_TARGETS == 1
-      fetch_four(shader, r, block, sampler[1], start[1], block, true, true, BLOCK_WIDTH / 4);
+      fetch_four(shader, r, block, sampler[1], start[1], true, true, BLOCK_WIDTH / 4);
 #else
       ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), 
          ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i),
          block);
-      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], block, true, true, BLOCK_WIDTH / 4);
+      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], true, true, BLOCK_WIDTH / 4);
 #endif
 
       for (j = 0; j < 4; ++j) {
-- 
cgit v1.2.3


From dbe6454aa32b448d1b76ce2ac4c44e11121e7218 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 11 Dec 2010 14:00:59 +0100
Subject: [g3dvl] move mv into vertex stream

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 82 ++++++++----------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h | 10 ++-
 2 files changed, 32 insertions(+), 60 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 3700de16d1b..0ebb9c48c24 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -43,7 +43,7 @@
 #define BLOCK_WIDTH 8
 #define BLOCK_HEIGHT 8
 
-struct vertex_stream_0
+struct vertex_stream
 {
    struct vertex2f pos;
    struct {
@@ -55,6 +55,7 @@ struct vertex_stream_0
    float frame_pred;
    float ref_frames;
    float bkwd_pred;
+   struct vertex2f mv[4];
 };
 
 enum VS_INPUT
@@ -584,13 +585,11 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    /* forward=0.0f backward=1.0f */
    vertex_elems[VS_I_BKWD_PRED].src_format = PIPE_FORMAT_R32_FLOAT;
 
-   r->pos_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 9, 1);
-
-   for (i = 0; i < 4; ++i) {
+   for (i = 0; i < 4; ++i)
       /* motion vector 0..4 element */
       vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R32G32_FLOAT;
-      r->mv_stride[i] = vl_vb_element_helper(&vertex_elems[VS_I_MV0 + i], 1, i + 2);
-   }
+
+   r->vertex_stream_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 13, 1);
 
    r->vertex_elems_state = r->pipe->create_vertex_elements_state(
       r->pipe, NUM_VS_INPUTS, vertex_elems);
@@ -734,49 +733,45 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
              struct vl_mpeg12_mc_buffer *buffer,
              struct pipe_mpeg12_macroblock *mb)
 {
-   struct vertex2f mv[4];
-   struct vertex_stream_0 info;
+   struct vertex_stream stream;
 
    unsigned i, j;
 
    assert(r);
    assert(mb);
 
-   info.pos.x = mb->mbx;
-   info.pos.y = mb->mby;
+   stream.pos.x = mb->mbx;
+   stream.pos.y = mb->mby;
    for ( i = 0; i < 2; ++i) {
       for ( j = 0; j < 2; ++j) {
-         info.eb[i][j].y = empty_block(r->chroma_format, mb->cbp, 0, j, i);
-         info.eb[i][j].cr = empty_block(r->chroma_format, mb->cbp, 1, j, i);
-         info.eb[i][j].cb = empty_block(r->chroma_format, mb->cbp, 2, j, i);         
+         stream.eb[i][j].y = empty_block(r->chroma_format, mb->cbp, 0, j, i);
+         stream.eb[i][j].cr = empty_block(r->chroma_format, mb->cbp, 1, j, i);
+         stream.eb[i][j].cb = empty_block(r->chroma_format, mb->cbp, 2, j, i);         
       }
    }
-   info.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
-   info.frame_pred = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? 1.0f : 0.0f;
-   info.bkwd_pred = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD ? 1.0f : 0.0f;
+   stream.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
+   stream.frame_pred = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? 1.0f : 0.0f;
+   stream.bkwd_pred = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD ? 1.0f : 0.0f;
    switch (mb->mb_type) {
       case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
-         info.ref_frames = -1.0f;
+         stream.ref_frames = -1.0f;
          break;
 
       case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
       case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
-         info.ref_frames = 1.0f;
+         stream.ref_frames = 1.0f;
          break;
         
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
-         info.ref_frames = 0.0f;
+         stream.ref_frames = 0.0f;
          break;
 
       default:
          assert(0);
    }
 
-   vl_vb_add_block(&buffer->pos, (float*)&info);
-
-   get_motion_vectors(mb, mv);
-   for ( j = 0; j < 4; ++j )
-      vl_vb_add_block(&buffer->mv[j], (float*)&mv[j]);
+   get_motion_vectors(mb, stream.mv);
+   vl_vb_add_block(&buffer->vertex_stream, (float*)&stream);
 }
 
 static void
@@ -961,17 +956,10 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    buffer->vertex_bufs.individual.quad.buffer_offset = renderer->quad.buffer_offset;
    pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, renderer->quad.buffer);
 
-   buffer->vertex_bufs.individual.pos = vl_vb_init(
-      &buffer->pos, renderer->pipe, renderer->macroblocks_per_batch, 
-      sizeof(struct vertex_stream_0) / sizeof(float),
-      renderer->pos_stride);
-
-   for (i = 0; i < 4; ++i) {
-      buffer->vertex_bufs.individual.mv[i] = vl_vb_init(
-         &buffer->mv[i], renderer->pipe, renderer->macroblocks_per_batch,
-         sizeof(struct vertex2f) / sizeof(float),
-         renderer->mv_stride[i]);
-   }
+   buffer->vertex_bufs.individual.stream = vl_vb_init(
+      &buffer->vertex_stream, renderer->pipe, renderer->macroblocks_per_batch, 
+      sizeof(struct vertex_stream) / sizeof(float),
+      renderer->vertex_stream_stride);
 
    return true;
 }
@@ -990,9 +978,7 @@ vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    }
 
    pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL);
-   vl_vb_cleanup(&buffer->pos);
-   for (i = 0; i < 4; ++i)
-      vl_vb_cleanup(&buffer->mv[i]);
+   vl_vb_cleanup(&buffer->vertex_stream);
 
    vl_idct_cleanup_buffer(&renderer->idct_luma, &buffer->idct_y);
    vl_idct_cleanup_buffer(&renderer->idct_chroma, &buffer->idct_cb);
@@ -1006,17 +992,13 @@ vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
 void
 vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
 {
-   unsigned i;
-
    assert(renderer && buffer);
 
    vl_idct_map_buffers(&renderer->idct_luma, &buffer->idct_y);
    vl_idct_map_buffers(&renderer->idct_chroma, &buffer->idct_cr);
    vl_idct_map_buffers(&renderer->idct_chroma, &buffer->idct_cb);
 
-   vl_vb_map(&buffer->pos, renderer->pipe);
-   for(i = 0; i < 4; ++i)
-      vl_vb_map(&buffer->mv[i], renderer->pipe);
+   vl_vb_map(&buffer->vertex_stream, renderer->pipe);
 }
 
 void
@@ -1071,24 +1053,18 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
 void
 vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
 {
-   unsigned i;
-
    assert(renderer && buffer);
 
    vl_idct_unmap_buffers(&renderer->idct_luma, &buffer->idct_y);
    vl_idct_unmap_buffers(&renderer->idct_chroma, &buffer->idct_cr);
    vl_idct_unmap_buffers(&renderer->idct_chroma, &buffer->idct_cb);
 
-   vl_vb_unmap(&buffer->pos, renderer->pipe);
-   for(i = 0; i < 4; ++i)
-      vl_vb_unmap(&buffer->mv[i], renderer->pipe);
+   vl_vb_unmap(&buffer->vertex_stream, renderer->pipe);
 }
 
 void
 vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
 {
-   unsigned i;
-
    assert(renderer && buffer);
    assert(buffer->num_macroblocks <= renderer->macroblocks_per_batch);
 
@@ -1099,15 +1075,13 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    vl_idct_flush(&renderer->idct_chroma, &buffer->idct_cr);
    vl_idct_flush(&renderer->idct_chroma, &buffer->idct_cb);
 
-   vl_vb_restart(&buffer->pos);
-   for(i = 0; i < 4; ++i)
-      vl_vb_restart(&buffer->mv[i]);
+   vl_vb_restart(&buffer->vertex_stream);
 
    renderer->fb_state.cbufs[0] = buffer->surface;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
    renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
-   renderer->pipe->set_vertex_buffers(renderer->pipe, 6, buffer->vertex_bufs.all);
+   renderer->pipe->set_vertex_buffers(renderer->pipe, 2, buffer->vertex_bufs.all);
    renderer->pipe->bind_vertex_elements_state(renderer->pipe, renderer->vertex_elems_state);
 
    if (buffer->past) {
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 157cbee1362..79e872c2bee 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -55,8 +55,7 @@ struct vl_mpeg12_mc_renderer
    enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode;
    unsigned macroblocks_per_batch;
 
-   unsigned pos_stride;
-   unsigned mv_stride[4];
+   unsigned vertex_stream_stride;
 
    struct pipe_viewport_state viewport;
    struct pipe_framebuffer_state fb_state;
@@ -83,8 +82,7 @@ struct vl_mpeg12_mc_buffer
 {
    struct vl_idct_buffer idct_y, idct_cb, idct_cr;
 
-   struct vl_vertex_buffer pos;
-   struct vl_vertex_buffer mv[4];
+   struct vl_vertex_buffer vertex_stream;
 
    union
    {
@@ -100,9 +98,9 @@ struct vl_mpeg12_mc_buffer
 
    union
    {
-      struct pipe_vertex_buffer all[6];
+      struct pipe_vertex_buffer all[2];
       struct {
-         struct pipe_vertex_buffer quad, pos, mv[4];
+         struct pipe_vertex_buffer quad, stream;
       } individual;
    } vertex_bufs;
 
-- 
cgit v1.2.3


From ad643bfc12de13f78b86de15a9d44a91ffa93ce9 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 11 Dec 2010 14:34:01 +0100
Subject: [g3dvl] remove "Nouveau can't writemask tex dst regs" workaround

This now works with r600g, but will probably break Nouveau.
It's just way faster on r600 hardware, so let's fix Nouveau.
---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 0ebb9c48c24..973a746d528 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -255,7 +255,7 @@ calc_field(struct ureg_program *shader)
    line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
 
    /*
-    * line.x going from 0 to 1 in steps of if not interlaced
+    * line.x going from 0 to 1 if not interlaced
     * line.x going from 0 to 8 in steps of 0.5 if interlaced
     * line.y going from 0 to 8 in steps of 0.5
     *
@@ -312,15 +312,13 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
    for (i = 0; i < 3; ++i) {
       ureg_IF(shader, ureg_scalar(ureg_src(t_eb_info), TGSI_SWIZZLE_X + i), &label);
 
-         /* Nouveau and r600g can't writemask tex dst regs (yet?), do in two steps */
+         /* Nouveau can't writemask tex dst regs (yet?), so this won't work anymore on nvidia hardware */
          if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
-            ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler[i]);
+            ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, ureg_src(t_tc), sampler[i]);
          } else {
-            ureg_TEX(shader, tmp, TGSI_TEXTURE_3D, tc[2], sampler[i]);
+            ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, tc[2], sampler[i]);
          }
 
-         ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
-
       ureg_ENDIF(shader);
    }
 
@@ -947,6 +945,10 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
       u_sampler_view_default_template(&sampler_view,
                                       buffer->textures.all[i],
                                       buffer->textures.all[i]->format);
+      sampler_view.swizzle_r = i == 0 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO;
+      sampler_view.swizzle_g = i == 1 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO;
+      sampler_view.swizzle_b = i == 2 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO;
+      sampler_view.swizzle_a = PIPE_SWIZZLE_ONE;
       buffer->sampler_views.all[i] = renderer->pipe->create_sampler_view(
          renderer->pipe, buffer->textures.all[i], &sampler_view);
    }
-- 
cgit v1.2.3


From d5295552f532a1e03b06ff497529f6c4b5ff84c6 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 12 Dec 2010 15:13:39 +0100
Subject: r600g: Why all this fiddling with tgsi_helper_copy?

tgsi_helper_copy is used on several occasions to copy a temporary result
into the real destination register to emulate writemasks for OP3 and
reduction operations. According to R600 ISA that's unnecessary.

This patch fixes this use for MAD, CMP and DP4.
---
 src/gallium/drivers/r600/r600_shader.c | 63 ++++++++++++++++++++++------------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index d6455023a3a..64c8b82c112 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1587,6 +1587,13 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
 	struct r600_bc_alu_src r600_src[3];
 	struct r600_bc_alu alu;
 	int i, j, r;
+	int lasti = 0;
+
+	for (i = 0; i < 4; i++) {
+		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+			lasti = i;
+		}
+	}
 
 	r = tgsi_split_constant(ctx, r600_src);
 	if (r)
@@ -1594,26 +1601,32 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
 	r = tgsi_split_literal_constant(ctx, r600_src);
 	if (r)
 		return r;
-	/* do it in 2 step as op3 doesn't support writemask */
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < lasti + 1; i++) {
+		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+			continue;
+
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 		alu.inst = ctx->inst_info->r600_opcode;
 		for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
 			alu.src[j] = r600_src[j];
 			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
 		}
-		alu.dst.sel = ctx->temp_reg;
+
+		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+		if (r)
+			return r;
+
 		alu.dst.chan = i;
 		alu.dst.write = 1;
 		alu.is_op3 = 1;
-		if (i == 3) {
+		if (i == lasti) {
 			alu.last = 1;
 		}
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
-	return tgsi_helper_copy(ctx, inst);
+	return 0;
 }
 
 static int tgsi_dp(struct r600_shader_ctx *ctx)
@@ -1636,7 +1649,13 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
 			alu.src[j] = r600_src[j];
 			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
 		}
-		alu.dst.sel = ctx->temp_reg;
+		if(inst->Dst[0].Register.WriteMask & (1 << i)) {
+			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+			if (r)
+				return r;
+		} else {
+			alu.dst.sel = ctx->temp_reg;
+		}
 		alu.dst.chan = i;
 		alu.dst.write = 1;
 		/* handle some special cases */
@@ -1670,7 +1689,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 	}
-	return tgsi_helper_copy(ctx, inst);
+	return 0;
 }
 
 static int tgsi_tex(struct r600_shader_ctx *ctx)
@@ -1851,6 +1870,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 	}
 
 	if (src_not_temp) {
+                assert(0);
 		for (i = 0; i < 4; i++) {
 			memset(&alu, 0, sizeof(struct r600_bc_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
@@ -2000,8 +2020,14 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	struct r600_bc_alu_src r600_src[3];
 	struct r600_bc_alu alu;
-	int use_temp = 0;
 	int i, r;
+	int lasti = 0;
+
+	for (i = 0; i < 4; i++) {
+		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+			lasti = i;
+		}
+	}
 
 	r = tgsi_split_constant(ctx, r600_src);
 	if (r)
@@ -2010,10 +2036,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 	if (r)
 		return r;
 
-	if (inst->Dst[0].Register.WriteMask != 0xf)
-		use_temp = 1;
+	for (i = 0; i < lasti + 1; i++) {
+		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+			continue;
 
-	for (i = 0; i < 4; i++) {
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE);
 		alu.src[0] = r600_src[0];
@@ -2025,24 +2051,19 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 		alu.src[2] = r600_src[1];
 		alu.src[2].chan = tgsi_chan(&inst->Src[1], i);
 
-		if (use_temp)
-			alu.dst.sel = ctx->temp_reg;
-		else {
-			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-			if (r)
-				return r;
-		}
+		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+		if (r)
+			return r;
+
 		alu.dst.chan = i;
 		alu.dst.write = 1;
 		alu.is_op3 = 1;
-		if (i == 3)
+		if (i == lasti)
 			alu.last = 1;
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
-	if (use_temp)
-		return tgsi_helper_copy(ctx, inst);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 7bc9ab1181ff3dd8db67f6b020857820c49e0cbb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 12 Dec 2010 15:37:54 +0100
Subject: r600g: texture instructions also work fine with TGSI_FILE_INPUT

---
 src/gallium/drivers/r600/r600_shader.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 64c8b82c112..6cf23535515 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1700,7 +1700,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 	unsigned src_gpr;
 	int r, i;
 	int opcode;
-	boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
+	boolean src_not_temp =
+		inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
+		inst->Src[0].Register.File != TGSI_FILE_INPUT;
 	uint32_t lit_vals[4];
 
 	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
@@ -1870,7 +1872,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 	}
 
 	if (src_not_temp) {
-                assert(0);
 		for (i = 0; i < 4; i++) {
 			memset(&alu, 0, sizeof(struct r600_bc_alu));
 			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
-- 
cgit v1.2.3


From ebab090ed93270b40475151e60dbc7f2b72f1a61 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 12 Dec 2010 21:40:41 +0100
Subject: [g3dvl] seperate texture addr generation from fetching

---
 src/gallium/auxiliary/vl/vl_idct.c | 137 ++++++++++++++++++-------------------
 1 file changed, 68 insertions(+), 69 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index ae80dc0a274..1a29b91cf51 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -140,18 +140,15 @@ create_vert_shader(struct vl_idct *idct)
 }
 
 static void
-fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
-           struct ureg_src tc, struct ureg_src sampler,
-           struct ureg_src start, bool right_side,
-           bool transposed, float size)
+calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
+          struct ureg_src tc, struct ureg_src start, bool right_side,
+          bool transposed, float size)
 {
-   struct ureg_dst t_tc;
    unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
    unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
 
-   t_tc = ureg_DECL_temporary(shader);
-   m[0] = ureg_DECL_temporary(shader);
-   m[1] = ureg_DECL_temporary(shader);
+   addr[0] = ureg_DECL_temporary(shader);
+   addr[1] = ureg_DECL_temporary(shader);
 
    /*
     * t_tc.x = right_side ? start.x : tc.x
@@ -159,44 +156,58 @@ fetch_four(struct ureg_program *shader, struct ureg_dst m[2],
     * m[0..1] = tex(t_tc++, sampler)
     */
    if(!right_side) {
-      ureg_MOV(shader, ureg_writemask(t_tc, wm_start), ureg_scalar(start, TGSI_SWIZZLE_X));
-      ureg_MOV(shader, ureg_writemask(t_tc, wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_Y));
+      ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, TGSI_SWIZZLE_X));
+      ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_Y));
    } else {
-      ureg_MOV(shader, ureg_writemask(t_tc, wm_start), ureg_scalar(start, TGSI_SWIZZLE_Y));
-      ureg_MOV(shader, ureg_writemask(t_tc, wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_X));
+      ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, TGSI_SWIZZLE_Y));
+      ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_X));
    }
-   ureg_FRC(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), tc);
+   ureg_FRC(shader, ureg_writemask(addr[0], TGSI_WRITEMASK_Z), tc);
 
-   ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
-   ureg_ADD(shader, ureg_writemask(t_tc, wm_start), ureg_src(t_tc), ureg_imm1f(shader, 1.0f / size));
-   ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
+   ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_src(addr[0]), ureg_imm1f(shader, 1.0f / size));
+   ureg_MOV(shader, ureg_writemask(addr[1], wm_tc | TGSI_WRITEMASK_Z), ureg_src(addr[0]));
+}
 
-   ureg_release_temporary(shader, t_tc);
+static void
+increment_addr(struct ureg_program *shader, struct ureg_dst addr[2],
+               bool right_side, bool transposed, float size)
+{
+   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
+
+   ureg_ADD(shader, ureg_writemask(addr[0], wm_tc),
+      ureg_src(addr[0]), ureg_imm1f(shader, 1.0f / size));
+   ureg_ADD(shader, ureg_writemask(addr[1], wm_tc),
+      ureg_src(addr[1]), ureg_imm1f(shader, 1.0f / size));
+}
+
+static void
+fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_dst addr[2], struct ureg_src sampler)
+{
+   m[0] = ureg_DECL_temporary(shader);
+   m[1] = ureg_DECL_temporary(shader);
+
+   ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, ureg_src(addr[0]), sampler);
+   ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, ureg_src(addr[1]), sampler);
 }
 
 static void
 matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
 {
-   struct ureg_dst tmp[2];
-   unsigned i;
+   struct ureg_dst tmp;
 
-   for(i = 0; i < 2; ++i) {
-      tmp[i] = ureg_DECL_temporary(shader);
-   }
+   tmp = ureg_DECL_temporary(shader);
 
    /*
-    * tmp[0..1] = dot4(m[0][0..1], m[1][0..1])
-    * dst = tmp[0] + tmp[1]
+    * tmp.xy = dot4(m[0][0..1], m[1][0..1])
+    * dst = tmp.x + tmp.y
     */
-   ureg_DP4(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
-   ureg_DP4(shader, ureg_writemask(tmp[1], TGSI_WRITEMASK_X), ureg_src(l[1]), ureg_src(r[1]));
+   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
+   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
    ureg_ADD(shader, dst,
-      ureg_scalar(ureg_src(tmp[0]), TGSI_SWIZZLE_X),
-      ureg_scalar(ureg_src(tmp[1]), TGSI_SWIZZLE_X));
+      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
+      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
 
-   for(i = 0; i < 2; ++i) {
-      ureg_release_temporary(shader, tmp[i]);
-   }
+   ureg_release_temporary(shader, tmp);
 }
 
 static void *
@@ -204,10 +215,9 @@ create_transpose_frag_shader(struct vl_idct *idct)
 {
    struct ureg_program *shader;
 
-   struct ureg_src block, tex, sampler[2];
-   struct ureg_src start[2];
+   struct ureg_src block, tex, start;
 
-   struct ureg_dst l[2], r[2];
+   struct ureg_dst laddr[2], l[2], r[2], raddr[2];
    struct ureg_dst tmp, fragment;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
@@ -216,15 +226,12 @@ create_transpose_frag_shader(struct vl_idct *idct)
 
    block = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
    tex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_CONSTANT);
+   start = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
 
-   sampler[0] = ureg_DECL_sampler(shader, 0);
-   sampler[1] = ureg_DECL_sampler(shader, 1);
-
-   start[0] = ureg_imm1f(shader, 0.0f);
-   start[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
-
-   fetch_four(shader, l, block, sampler[0], start[0], false, false, BLOCK_WIDTH / 4);
-   fetch_four(shader, r, tex, sampler[1], start[1], true, false, idct->buffer_height / 4);
+   calc_addr(shader, laddr, block, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
+   calc_addr(shader, raddr, tex, start, true, false, idct->buffer_height / 4);
+   fetch_four(shader, l, laddr, ureg_DECL_sampler(shader, 0));
+   fetch_four(shader, r, raddr, ureg_DECL_sampler(shader, 1));
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -233,8 +240,12 @@ create_transpose_frag_shader(struct vl_idct *idct)
    ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
 
    ureg_release_temporary(shader, tmp);
+   ureg_release_temporary(shader, laddr[0]);
+   ureg_release_temporary(shader, laddr[1]);
    ureg_release_temporary(shader, l[0]);
    ureg_release_temporary(shader, l[1]);
+   ureg_release_temporary(shader, raddr[0]);
+   ureg_release_temporary(shader, raddr[1]);
    ureg_release_temporary(shader, r[0]);
    ureg_release_temporary(shader, r[1]);
 
@@ -248,11 +259,10 @@ create_matrix_frag_shader(struct vl_idct *idct)
 {
    struct ureg_program *shader;
 
-   struct ureg_src tex, block, sampler[2];
-   struct ureg_src start[2];
+   struct ureg_src tex, block, start;
 
    struct ureg_dst l[4][2], r[2];
-   struct ureg_dst t_tc, tmp, fragment[NR_RENDER_TARGETS];
+   struct ureg_dst addr[2], fragment[NR_RENDER_TARGETS];
 
    unsigned i, j;
 
@@ -260,41 +270,31 @@ create_matrix_frag_shader(struct vl_idct *idct)
    if (!shader)
       return NULL;
 
-   t_tc = ureg_DECL_temporary(shader);
-   tmp = ureg_DECL_temporary(shader);
-
    tex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
    block = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
-
-   sampler[0] = ureg_DECL_sampler(shader, 1);
-   sampler[1] = ureg_DECL_sampler(shader, 0);
-
-   start[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
-   start[1] = ureg_imm1f(shader, 0.0f);
+   start = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
 
    for (i = 0; i < NR_RENDER_TARGETS; ++i)
        fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
 
    for (i = 0; i < 4; ++i) {
       if(i == 0)
-         ureg_MOV(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), tex);
+         calc_addr(shader, addr, tex, start, false, false, idct->buffer_width / 4);
       else
-         ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Y), 
-            ureg_src(t_tc), ureg_imm1f(shader, 1.0f / idct->buffer_height));
+         increment_addr(shader, addr, false, false, idct->buffer_height);
 
-      fetch_four(shader, l[i], ureg_src(t_tc), sampler[0], start[0], false, false, idct->buffer_width / 4);
+      fetch_four(shader, l[i], addr, ureg_DECL_sampler(shader, 1));
    }
+   ureg_release_temporary(shader, addr[0]);
+   ureg_release_temporary(shader, addr[1]);
    
    for (i = 0; i < NR_RENDER_TARGETS; ++i) {
+      if (i == 0)
+         calc_addr(shader, addr, block, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
+      else
+         increment_addr(shader, addr, true, true, BLOCK_HEIGHT);
 
-#if NR_RENDER_TARGETS == 1
-      fetch_four(shader, r, block, sampler[1], start[1], true, true, BLOCK_WIDTH / 4);
-#else
-      ureg_ADD(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_X), 
-         ureg_imm1f(shader, 1.0f / BLOCK_WIDTH * i),
-         block);
-      fetch_four(shader, r, ureg_src(t_tc), sampler[1], start[1], true, true, BLOCK_WIDTH / 4);
-#endif
+      fetch_four(shader, r, addr, ureg_DECL_sampler(shader, 0));
 
       for (j = 0; j < 4; ++j) {
          matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
@@ -302,9 +302,8 @@ create_matrix_frag_shader(struct vl_idct *idct)
       ureg_release_temporary(shader, r[0]);
       ureg_release_temporary(shader, r[1]);
    }
-
-   ureg_release_temporary(shader, t_tc);
-   ureg_release_temporary(shader, tmp);
+   ureg_release_temporary(shader, addr[0]);
+   ureg_release_temporary(shader, addr[1]);
 
    for (i = 0; i < 4; ++i) {
       ureg_release_temporary(shader, l[i][0]);
-- 
cgit v1.2.3


From be4de05c1093db27b3fca12b782055ab8a1eba13 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 12 Dec 2010 22:55:23 +0100
Subject: [g3dvl] move idct texture addr generation into vertex shader

---
 src/gallium/auxiliary/vl/vl_idct.c | 272 +++++++++++++++++++------------------
 src/gallium/auxiliary/vl/vl_idct.h |   2 +-
 2 files changed, 143 insertions(+), 131 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 1a29b91cf51..5a32573dd66 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -58,9 +58,10 @@ enum VS_INPUT
 enum VS_OUTPUT
 {
    VS_O_VPOS,
-   VS_O_BLOCK,
-   VS_O_TEX,
-   VS_O_START
+   VS_O_L_ADDR0,
+   VS_O_L_ADDR1,
+   VS_O_R_ADDR0,
+   VS_O_R_ADDR1
 };
 
 static const float const_matrix[8][8] = {
@@ -74,28 +75,58 @@ static const float const_matrix[8][8] = {
    {  0.0975451f, -0.2777850f,  0.4157350f, -0.4903930f,  0.4903930f, -0.4157350f,  0.277786f, -0.0975458f }
 };
 
+static void
+calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
+          struct ureg_src tc, struct ureg_src start, bool right_side,
+          bool transposed, float size)
+{
+   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
+   unsigned sw_start = right_side ? TGSI_SWIZZLE_Y : TGSI_SWIZZLE_X;
+
+   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
+   unsigned sw_tc = right_side ? TGSI_SWIZZLE_X : TGSI_SWIZZLE_Y;
+
+   /*
+    * addr[0..1].(start) = right_side ? start.x : tc.x
+    * addr[0..1].(tc) = right_side ? tc.y : start.y
+    * addr[0..1].z = tc.z
+    * addr[1].(start) += 1.0f / scale
+    */
+   ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));
+   ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));
+   ureg_MOV(shader, ureg_writemask(addr[0], TGSI_WRITEMASK_Z), tc);
+
+   ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));
+   ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));
+   ureg_MOV(shader, ureg_writemask(addr[1], TGSI_WRITEMASK_Z), tc);
+}
+
 static void *
-create_vert_shader(struct vl_idct *idct)
+create_vert_shader(struct vl_idct *idct, bool matrix_stage)
 {
    struct ureg_program *shader;
    struct ureg_src scale;
    struct ureg_src vrect, vpos;
-   struct ureg_dst t_vpos;
-   struct ureg_dst o_vpos, o_block, o_tex, o_start;
+   struct ureg_dst t_tex, t_start;
+   struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
       return NULL;
 
-   t_vpos = ureg_DECL_temporary(shader);
+   t_tex = ureg_DECL_temporary(shader);
+   t_start = ureg_DECL_temporary(shader);
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
-   o_block = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK);
-   o_tex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX);
-   o_start = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_START);
+
+   o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
+   o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
+
+   o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0);
+   o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1);
 
    /*
     * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
@@ -104,76 +135,48 @@ create_vert_shader(struct vl_idct *idct)
     * o_vpos.xy = t_vpos * scale
     * o_vpos.zw = vpos
     *
-    * o_block = vrect
-    * o_tex = t_pos
-    * o_start = vpos * scale
+    * o_l_addr = calc_addr(...)
+    * o_r_addr = calc_addr(...)
     *
     */
    scale = ureg_imm2f(shader,
       (float)BLOCK_WIDTH / idct->buffer_width,
       (float)BLOCK_HEIGHT / idct->buffer_height);
 
-   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
-   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
-   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
+   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
-   ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_XY), vrect);
-   ureg_MOV(shader, ureg_writemask(o_block, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
-
-   ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-#if NR_RENDER_TARGETS == 1
-   ureg_MOV(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_Z), ureg_imm1f(shader, 0.0f));
-#else
-   ureg_MUL(shader, ureg_writemask(o_tex, TGSI_WRITEMASK_Z), 
+   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
       ureg_scalar(vrect, TGSI_SWIZZLE_X),
       ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS));
-#endif
 
-   ureg_MUL(shader, ureg_writemask(o_start, TGSI_WRITEMASK_XY), vpos, scale);
+   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
 
-   ureg_release_temporary(shader, t_vpos);
+   if(matrix_stage) {
+      calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
+      calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
+   } else {
+      calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
+      calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
+   }
+
+   ureg_release_temporary(shader, t_tex);
+   ureg_release_temporary(shader, t_start);
 
    ureg_END(shader);
 
    return ureg_create_shader_and_destroy(shader, idct->pipe);
 }
 
-static void
-calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
-          struct ureg_src tc, struct ureg_src start, bool right_side,
-          bool transposed, float size)
-{
-   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
-   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
-
-   addr[0] = ureg_DECL_temporary(shader);
-   addr[1] = ureg_DECL_temporary(shader);
-
-   /*
-    * t_tc.x = right_side ? start.x : tc.x
-    * t_tc.y = right_side ? tc.y : start.y
-    * m[0..1] = tex(t_tc++, sampler)
-    */
-   if(!right_side) {
-      ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, TGSI_SWIZZLE_X));
-      ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_Y));
-   } else {
-      ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, TGSI_SWIZZLE_Y));
-      ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, TGSI_SWIZZLE_X));
-   }
-   ureg_FRC(shader, ureg_writemask(addr[0], TGSI_WRITEMASK_Z), tc);
-
-   ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_src(addr[0]), ureg_imm1f(shader, 1.0f / size));
-   ureg_MOV(shader, ureg_writemask(addr[1], wm_tc | TGSI_WRITEMASK_Z), ureg_src(addr[0]));
-}
-
 static void
 increment_addr(struct ureg_program *shader, struct ureg_dst addr[2],
                bool right_side, bool transposed, float size)
 {
    unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
 
+   /* addr[0..1]++ */
    ureg_ADD(shader, ureg_writemask(addr[0], wm_tc),
       ureg_src(addr[0]), ureg_imm1f(shader, 1.0f / size));
    ureg_ADD(shader, ureg_writemask(addr[1], wm_tc),
@@ -181,13 +184,13 @@ increment_addr(struct ureg_program *shader, struct ureg_dst addr[2],
 }
 
 static void
-fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_dst addr[2], struct ureg_src sampler)
+fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler)
 {
    m[0] = ureg_DECL_temporary(shader);
    m[1] = ureg_DECL_temporary(shader);
 
-   ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, ureg_src(addr[0]), sampler);
-   ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, ureg_src(addr[1]), sampler);
+   ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler);
+   ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler);
 }
 
 static void
@@ -210,59 +213,15 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2
    ureg_release_temporary(shader, tmp);
 }
 
-static void *
-create_transpose_frag_shader(struct vl_idct *idct)
-{
-   struct ureg_program *shader;
-
-   struct ureg_src block, tex, start;
-
-   struct ureg_dst laddr[2], l[2], r[2], raddr[2];
-   struct ureg_dst tmp, fragment;
-
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
-
-   block = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
-   tex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_CONSTANT);
-   start = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
-
-   calc_addr(shader, laddr, block, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
-   calc_addr(shader, raddr, tex, start, true, false, idct->buffer_height / 4);
-   fetch_four(shader, l, laddr, ureg_DECL_sampler(shader, 0));
-   fetch_four(shader, r, raddr, ureg_DECL_sampler(shader, 1));
-
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
-   tmp = ureg_DECL_temporary(shader);
-   matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), l, r);
-   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
-
-   ureg_release_temporary(shader, tmp);
-   ureg_release_temporary(shader, laddr[0]);
-   ureg_release_temporary(shader, laddr[1]);
-   ureg_release_temporary(shader, l[0]);
-   ureg_release_temporary(shader, l[1]);
-   ureg_release_temporary(shader, raddr[0]);
-   ureg_release_temporary(shader, raddr[1]);
-   ureg_release_temporary(shader, r[0]);
-   ureg_release_temporary(shader, r[1]);
-
-   ureg_END(shader);
-
-   return ureg_create_shader_and_destroy(shader, idct->pipe);
-}
-
 static void *
 create_matrix_frag_shader(struct vl_idct *idct)
 {
    struct ureg_program *shader;
 
-   struct ureg_src tex, block, start;
+   struct ureg_src l_addr[2], r_addr[2], saddr[2];
 
-   struct ureg_dst l[4][2], r[2];
-   struct ureg_dst addr[2], fragment[NR_RENDER_TARGETS];
+   struct ureg_dst addr[2], l[4][2], r[2];
+   struct ureg_dst fragment[NR_RENDER_TARGETS];
 
    unsigned i, j;
 
@@ -270,31 +229,39 @@ create_matrix_frag_shader(struct vl_idct *idct)
    if (!shader)
       return NULL;
 
-   tex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
-   block = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
-   start = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_START, TGSI_INTERPOLATE_CONSTANT);
+   addr[0] = ureg_DECL_temporary(shader);
+   addr[1] = ureg_DECL_temporary(shader);
+
+   saddr[0] = ureg_src(addr[0]);
+   saddr[1] = ureg_src(addr[1]);
+
+   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
 
    for (i = 0; i < NR_RENDER_TARGETS; ++i)
        fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
 
    for (i = 0; i < 4; ++i) {
-      if(i == 0)
-         calc_addr(shader, addr, tex, start, false, false, idct->buffer_width / 4);
-      else
+      if(i == 0) {
+         ureg_MOV(shader, addr[0], l_addr[0]);
+         ureg_MOV(shader, addr[1], l_addr[1]);
+      } else
          increment_addr(shader, addr, false, false, idct->buffer_height);
 
-      fetch_four(shader, l[i], addr, ureg_DECL_sampler(shader, 1));
+      fetch_four(shader, l[i], saddr, ureg_DECL_sampler(shader, 1));
    }
-   ureg_release_temporary(shader, addr[0]);
-   ureg_release_temporary(shader, addr[1]);
    
    for (i = 0; i < NR_RENDER_TARGETS; ++i) {
-      if (i == 0)
-         calc_addr(shader, addr, block, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
-      else
+      if(i == 0) {
+         ureg_MOV(shader, addr[0], r_addr[0]);
+         ureg_MOV(shader, addr[1], r_addr[1]);
+      } else
          increment_addr(shader, addr, true, true, BLOCK_HEIGHT);
 
-      fetch_four(shader, r, addr, ureg_DECL_sampler(shader, 0));
+      fetch_four(shader, r, saddr, ureg_DECL_sampler(shader, 0));
 
       for (j = 0; j < 4; ++j) {
          matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
@@ -302,13 +269,53 @@ create_matrix_frag_shader(struct vl_idct *idct)
       ureg_release_temporary(shader, r[0]);
       ureg_release_temporary(shader, r[1]);
    }
-   ureg_release_temporary(shader, addr[0]);
-   ureg_release_temporary(shader, addr[1]);
 
    for (i = 0; i < 4; ++i) {
       ureg_release_temporary(shader, l[i][0]);
       ureg_release_temporary(shader, l[i][1]);
    }
+   ureg_release_temporary(shader, addr[0]);
+   ureg_release_temporary(shader, addr[1]);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
+static void *
+create_transpose_frag_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+
+   struct ureg_src l_addr[2], r_addr[2];
+
+   struct ureg_dst l[2], r[2];
+   struct ureg_dst tmp, fragment;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+   fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0));
+   fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1));
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   tmp = ureg_DECL_temporary(shader);
+   matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), l, r);
+   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
+
+   ureg_release_temporary(shader, tmp);
+   ureg_release_temporary(shader, l[0]);
+   ureg_release_temporary(shader, l[1]);
+   ureg_release_temporary(shader, r[0]);
+   ureg_release_temporary(shader, r[1]);
 
    ureg_END(shader);
 
@@ -318,21 +325,25 @@ create_matrix_frag_shader(struct vl_idct *idct)
 static bool
 init_shaders(struct vl_idct *idct)
 {
-   idct->vs = create_vert_shader(idct);
+   idct->matrix_vs = create_vert_shader(idct, true);
    idct->matrix_fs = create_matrix_frag_shader(idct);
+
+   idct->transpose_vs = create_vert_shader(idct, false);
    idct->transpose_fs = create_transpose_frag_shader(idct);
 
    return 
-      idct->vs != NULL &&
-      idct->transpose_fs != NULL &&
-      idct->matrix_fs != NULL;
+      idct->matrix_vs != NULL &&
+      idct->matrix_fs != NULL &&
+      idct->transpose_vs != NULL &&
+      idct->transpose_fs != NULL;
 }
 
 static void
 cleanup_shaders(struct vl_idct *idct)
 {
-   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
+   idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
    idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
+   idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
    idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
 }
 
@@ -353,9 +364,9 @@ init_state(struct vl_idct *idct)
 
    for (i = 0; i < 4; ++i) {
       memset(&sampler, 0, sizeof(sampler));
-      sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-      sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+      sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
+      sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
+      sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
       sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
       sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
       sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
@@ -730,13 +741,13 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer)
       idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
       idct->pipe->set_vertex_buffers(idct->pipe, 2, buffer->vertex_bufs.all);
       idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
-      idct->pipe->bind_vs_state(idct->pipe, idct->vs);
 
       /* first stage */
       idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]);
       idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]);
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
+      idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts);
 
@@ -745,6 +756,7 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer)
       idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]);
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
+      idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
       util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts);
    }
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 6076bdec46c..fcba75a7607 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -53,7 +53,7 @@ struct vl_idct
       } individual;
    } samplers;
 
-   void *vs;
+   void *matrix_vs, *transpose_vs;
    void *matrix_fs, *transpose_fs;
 
    struct pipe_resource *matrix;
-- 
cgit v1.2.3


From e13fecbbd69ab7e119fcf1dafcb4fb517de5c20d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 13 Dec 2010 00:04:58 +0100
Subject: [g3dvl] add reg_fixup_label to IF ELSE ENDIF

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 973a746d528..b0ac57ab7a3 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -206,6 +206,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
          ureg_scalar(vrect, TGSI_SWIZZLE_Y),
          ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
 
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
 
    ureg_CMP(shader, ureg_writemask(o_eb[0], TGSI_WRITEMASK_XYZ),
@@ -319,6 +320,7 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
             ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, tc[2], sampler[i]);
          }
 
+      ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
       ureg_ENDIF(shader);
    }
 
@@ -366,10 +368,13 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
           */
          ureg_IF(shader, bkwd_pred, &label);
             ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]);
+         ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
          ureg_ELSE(shader, &label);
             ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(tmp), sampler[0]);
+         ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
          ureg_ENDIF(shader);
 
+      ureg_fixup_label(shader, bi_label, ureg_get_instruction_number(shader));
       ureg_ELSE(shader, &bi_label);
 
          /*
@@ -388,7 +393,9 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
          ureg_LRP(shader, result, ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X),
             ureg_src(ref[0]), ureg_src(ref[1]));
 
+      ureg_fixup_label(shader, bi_label, ureg_get_instruction_number(shader));
       ureg_ENDIF(shader);
+   ureg_fixup_label(shader, intra_label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
 
    for (i = 0; i < 2; ++i)
-- 
cgit v1.2.3


From d92e97d8848dd96fd8e5b93e62edbf7b5f509efc Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 14 Dec 2010 00:43:53 +0100
Subject: r600g: optimize away CF_INST_POP

If last instruction is an CF_INST_ALU we don't need to emit an
additional CF_INST_POP for stack clean up after an IF ELSE ENDIF.
---
 src/gallium/drivers/r600/r600_asm.c    |  8 ++++++++
 src/gallium/drivers/r600/r600_shader.c | 22 +++++++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 1f41269534a..4476f432b98 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -507,6 +507,8 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
 	}
 	/* same on EG */
 	if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
+	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)) &&
+	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)) &&
 	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
 		LIST_IS_EMPTY(&bc->cf_last->alu)) {
 		R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
@@ -728,6 +730,8 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 
 	switch (cf->inst) {
 	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
 	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
 			S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
@@ -808,6 +812,8 @@ int r600_bc_build(struct r600_bc *bc)
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
 		switch (cf->inst) {
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
 			break;
 		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
@@ -854,6 +860,8 @@ int r600_bc_build(struct r600_bc *bc)
 			return r;
 		switch (cf->inst) {
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
 			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
 				switch(bc->chiprev) {
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 6cf23535515..aaebd16a310 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -2676,9 +2676,25 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
 
 static int pops(struct r600_shader_ctx *ctx, int pops)
 {
-	r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
-	ctx->bc->cf_last->pop_count = pops;
-	ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+	int alu_pop = 3;
+	if (ctx->bc->cf_last) {
+		if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
+			alu_pop = 0;
+		else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
+			alu_pop = 1;
+	}
+	alu_pop += pops;
+	if (alu_pop == 1) {
+		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
+		ctx->bc->force_add_cf = 1;
+	} else if (alu_pop == 2) {
+		ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
+		ctx->bc->force_add_cf = 1;
+	} else {
+		r600_bc_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_POP));
+		ctx->bc->cf_last->pop_count = pops;
+		ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 9d967fc91e471206d01a54ec097f9e0903ac07ae Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 14 Dec 2010 19:32:08 +0100
Subject: r600g: DP4 also supports writemasking

---
 src/gallium/drivers/r600/r600_shader.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index aaebd16a310..3deabbca3a9 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1649,15 +1649,13 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
 			alu.src[j] = r600_src[j];
 			alu.src[j].chan = tgsi_chan(&inst->Src[j], i);
 		}
-		if(inst->Dst[0].Register.WriteMask & (1 << i)) {
-			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
-			if (r)
-				return r;
-		} else {
-			alu.dst.sel = ctx->temp_reg;
-		}
+
+		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+		if (r)
+			return r;
+
 		alu.dst.chan = i;
-		alu.dst.write = 1;
+		alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
 		/* handle some special cases */
 		switch (ctx->inst_info->tgsi_opcode) {
 		case TGSI_OPCODE_DP2:
-- 
cgit v1.2.3


From d98d2e7c6c34039032e6a0fb8888c450660c763f Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 14 Dec 2010 20:20:36 +0100
Subject: [g3dvl] no need to swizzle a scalar

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index b0ac57ab7a3..eec6a65ee79 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -390,7 +390,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
             tc[3], tc[2]);
          ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]);
 
-         ureg_LRP(shader, result, ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X),
+         ureg_LRP(shader, result, ureg_imm1f(shader, 0.5f),
             ureg_src(ref[0]), ureg_src(ref[1]));
 
       ureg_fixup_label(shader, bi_label, ureg_get_instruction_number(shader));
-- 
cgit v1.2.3


From 270f6d194c124294d7ca8256c6bf7b6010e8510c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 14 Dec 2010 20:49:31 +0100
Subject: r600g: optimize temp register handling for LRP

---
 src/gallium/drivers/r600/r600_shader.c | 72 ++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 3deabbca3a9..e00c844a17b 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -864,19 +864,25 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
 	return 0;
 }
 
-static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
+static int tgsi_last_instruction(unsigned writemask)
 {
-	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu_src r600_src[3];
-	struct r600_bc_alu alu;
-	int i, j, r;
-	int lasti = 0;
+	int i, lasti = 0;
 
 	for (i = 0; i < 4; i++) {
-		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+		if (writemask & (1 << i)) {
 			lasti = i;
 		}
 	}
+	return lasti;
+}
+
+static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	struct r600_bc_alu_src r600_src[3];
+	struct r600_bc_alu alu;
+	int i, j, r;
+	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 
 	r = tgsi_split_constant(ctx, r600_src);
 	if (r)
@@ -1037,7 +1043,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
 	struct r600_bc_alu_src r600_src[3];
 	struct r600_bc_alu alu;
 	int i, r;
-	int lasti = 0;
+	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 
 	r = tgsi_setup_trig(ctx, r600_src);
 	if (r)
@@ -1057,10 +1063,6 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
 		return r;
 
 	/* replicate result */
-	for (i = 0; i < 4; i++) {
-		if (inst->Dst[0].Register.WriteMask & (1 << i))
-			lasti = i;
-	}
 	for (i = 0; i < lasti + 1; i++) {
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
 			continue;
@@ -1587,13 +1589,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
 	struct r600_bc_alu_src r600_src[3];
 	struct r600_bc_alu alu;
 	int i, j, r;
-	int lasti = 0;
-
-	for (i = 0; i < 4; i++) {
-		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
-			lasti = i;
-		}
-	}
+	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 
 	r = tgsi_split_constant(ctx, r600_src);
 	if (r)
@@ -1937,6 +1933,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	struct r600_bc_alu_src r600_src[3];
 	struct r600_bc_alu alu;
+	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 	unsigned i;
 	int r;
 
@@ -1947,7 +1944,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 	if (r)
 		return r;
 	/* 1 - src0 */
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < lasti + 1; i++) {
+		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+			continue;
+
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
 		alu.src[0].sel = V_SQ_ALU_SRC_1;
@@ -1957,7 +1957,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		alu.src[1].neg = 1;
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = i;
-		if (i == 3) {
+		if (i == lasti) {
 			alu.last = 1;
 		}
 		alu.dst.write = 1;
@@ -1970,7 +1970,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		return r;
 
 	/* (1 - src0) * src2 */
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < lasti + 1; i++) {
+		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+			continue;
+
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
 		alu.src[0].sel = ctx->temp_reg;
@@ -1979,7 +1982,7 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = i;
-		if (i == 3) {
+		if (i == lasti) {
 			alu.last = 1;
 		}
 		alu.dst.write = 1;
@@ -1992,7 +1995,10 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		return r;
 
 	/* src0 * src1 + (1 - src0) * src2 */
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < lasti + 1; i++) {
+		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+			continue;
+
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 		alu.is_op3 = 1;
@@ -2002,16 +2008,20 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		alu.src[1].chan = tgsi_chan(&inst->Src[1], i);
 		alu.src[2].sel = ctx->temp_reg;
 		alu.src[2].chan = i;
-		alu.dst.sel = ctx->temp_reg;
+
+		r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+		if (r)
+			return r;
+
 		alu.dst.chan = i;
-		if (i == 3) {
+		if (i == lasti) {
 			alu.last = 1;
 		}
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
 	}
-	return tgsi_helper_copy(ctx, inst);
+	return 0;
 }
 
 static int tgsi_cmp(struct r600_shader_ctx *ctx)
@@ -2020,13 +2030,7 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 	struct r600_bc_alu_src r600_src[3];
 	struct r600_bc_alu alu;
 	int i, r;
-	int lasti = 0;
-
-	for (i = 0; i < 4; i++) {
-		if (inst->Dst[0].Register.WriteMask & (1 << i)) {
-			lasti = i;
-		}
-	}
+	int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
 
 	r = tgsi_split_constant(ctx, r600_src);
 	if (r)
-- 
cgit v1.2.3


From f75578b31e3cc660c654d13e4f5c4aa8bfe7c6a5 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 14 Dec 2010 23:38:30 +0100
Subject: r600g: use special constants for 0, 1, -1, 1.0f, 0.5f etc

---
 src/gallium/drivers/r600/r600_shader.c | 56 ++++++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index e00c844a17b..9de08847263 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -542,7 +542,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 
 	ctx.file_offset[TGSI_FILE_CONSTANT] = 128;
 
-	ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253;
+	ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
 	ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
 			ctx.info.file_count[TGSI_FILE_TEMPORARY];
 
@@ -730,22 +730,54 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
 			const struct tgsi_full_src_register *tgsi_src,
 			struct r600_bc_alu_src *r600_src)
 {
-	int index;
 	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
-	r600_src->sel = tgsi_src->Register.Index;
-	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
-		r600_src->sel = 0;
+	r600_src->neg = tgsi_src->Register.Negate;
+	r600_src->abs = tgsi_src->Register.Absolute;
+	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {		
+		int index;
+		if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
+			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
+			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
+
+			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
+			switch(ctx->literals[index]) {
+			case 0:
+				r600_src->sel = V_SQ_ALU_SRC_0;
+				return 0;
+			case 1:
+				r600_src->sel = V_SQ_ALU_SRC_1_INT;
+				return 0;
+			case -1:
+				r600_src->sel = V_SQ_ALU_SRC_M_1_INT;
+				return 0;
+			case 0x3F800000: // 1.0f
+				r600_src->sel = V_SQ_ALU_SRC_1;
+				return 0;
+			case 0x3F000000: // 0.5f
+				r600_src->sel = V_SQ_ALU_SRC_0_5;
+				return 0;
+			case 0xBF800000: // -1.0f
+				r600_src->sel = V_SQ_ALU_SRC_1;
+				r600_src->neg ^= 1;
+				return 0;
+			case 0xBF000000: // -0.5f
+				r600_src->sel = V_SQ_ALU_SRC_0_5;
+				r600_src->neg ^= 1;
+				return 0;
+			}
+		}
 		index = tgsi_src->Register.Index;
+		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
 		ctx->value[0] = ctx->literals[index * 4 + 0];
 		ctx->value[1] = ctx->literals[index * 4 + 1];
 		ctx->value[2] = ctx->literals[index * 4 + 2];
 		ctx->value[3] = ctx->literals[index * 4 + 3];
+	} else {
+		if (tgsi_src->Register.Indirect)
+			r600_src->rel = V_SQ_REL_RELATIVE;
+		r600_src->sel = tgsi_src->Register.Index;
+		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
 	}
-	if (tgsi_src->Register.Indirect)
-		r600_src->rel = V_SQ_REL_RELATIVE;
-	r600_src->neg = tgsi_src->Register.Negate;
-	r600_src->abs = tgsi_src->Register.Absolute;
-	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
 	return 0;
 }
 
@@ -833,12 +865,12 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
 	int i, j, k, nliteral, r;
 
 	for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
-		if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
+		if (r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
 			nliteral++;
 		}
 	}
 	for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) {
-		if (j > 0 && inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
+		if (j > 0 && r600_src[i].sel == V_SQ_ALU_SRC_LITERAL) {
 			int treg = r600_get_temp(ctx);
 			for (k = 0; k < 4; k++) {
 				memset(&alu, 0, sizeof(struct r600_bc_alu));
-- 
cgit v1.2.3


From 08c8cd26b86b1d5514ff152ce853eae0541b02fd Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 16 Dec 2010 16:42:14 +0100
Subject: r600g: implement output modifiers and use them to further optimize
 LRP

---
 src/gallium/drivers/r600/r600_asm.c    |  1 +
 src/gallium/drivers/r600/r600_asm.h    |  1 +
 src/gallium/drivers/r600/r600_shader.c | 29 +++++++++++++++++++++++++++++
 src/gallium/drivers/r600/r700_asm.c    |  1 +
 4 files changed, 32 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 4476f432b98..2a62b06cc60 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -707,6 +707,7 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
 					S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
 					S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
 					S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+					S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
 					S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
 					S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
 					S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index b147f0f5c88..ac3ed3c8520 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -62,6 +62,7 @@ struct r600_bc_alu {
 	unsigned			bank_swizzle_force;
 	u32				value[4];
 	int				hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+	unsigned			omod;
 };
 
 struct r600_bc_tex {
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 9de08847263..80579b8d871 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1975,6 +1975,35 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 	r = tgsi_split_literal_constant(ctx, r600_src);
 	if (r)
 		return r;
+
+	/* optimize if it's just an equal balance */
+	if(r600_src[0].sel == V_SQ_ALU_SRC_0_5) {
+		for (i = 0; i < lasti + 1; i++) {
+			if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+				continue;
+
+			memset(&alu, 0, sizeof(struct r600_bc_alu));
+			alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD);
+			alu.src[0] = r600_src[1];
+			alu.src[0].chan = tgsi_chan(&inst->Src[1], i);
+			alu.src[1] = r600_src[2];
+			alu.src[1].chan = tgsi_chan(&inst->Src[2], i);
+			alu.omod = 3;
+			r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+			if (r)
+				return r;
+
+			alu.dst.chan = i;
+			if (i == lasti) {
+				alu.last = 1;
+			}
+			r = r600_bc_add_alu(ctx->bc, &alu);
+			if (r)
+				return r;
+		}
+		return 0;
+	}
+
 	/* 1 - src0 */
 	for (i = 0; i < lasti + 1; i++) {
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index 892dee86baf..3eb6fb50ca7 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -61,6 +61,7 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
 					S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
 					S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
 					S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
+					S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
 					S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
 					S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
 			                S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
-- 
cgit v1.2.3


From f780626c35c7c3cac2e9aa7c2ec77ca587d6ab95 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 16 Dec 2010 18:41:13 +0100
Subject: [g3dvl] move scaling completely into matrix and use less temp
 registers

---
 src/gallium/auxiliary/vl/vl_idct.c               | 89 ++++++++++++------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 25 +++----
 2 files changed, 56 insertions(+), 58 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 5a32573dd66..168c2d7d945 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -42,9 +42,6 @@
 
 #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
 
-#define STAGE1_SCALE 4.0f
-#define STAGE2_SCALE (SCALE_FACTOR_16_TO_9 / STAGE1_SCALE / STAGE1_SCALE)
-
 #define NR_RENDER_TARGETS 4
 
 enum VS_INPUT
@@ -171,24 +168,27 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage)
 }
 
 static void
-increment_addr(struct ureg_program *shader, struct ureg_dst addr[2],
-               bool right_side, bool transposed, float size)
+increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
+               struct ureg_src saddr[2], bool right_side, bool transposed,
+               int pos, float size)
 {
+   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
    unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
 
-   /* addr[0..1]++ */
-   ureg_ADD(shader, ureg_writemask(addr[0], wm_tc),
-      ureg_src(addr[0]), ureg_imm1f(shader, 1.0f / size));
-   ureg_ADD(shader, ureg_writemask(addr[1], wm_tc),
-      ureg_src(addr[1]), ureg_imm1f(shader, 1.0f / size));
+   /* 
+    * daddr[0..1].(start) = saddr[0..1].(start) 
+    * daddr[0..1].(tc) = saddr[0..1].(tc) 
+    */
+   
+   ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
+   ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
+   ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
+   ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
 }
 
 static void
 fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler)
 {
-   m[0] = ureg_DECL_temporary(shader);
-   m[1] = ureg_DECL_temporary(shader);
-
    ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler);
    ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler);
 }
@@ -218,9 +218,9 @@ create_matrix_frag_shader(struct vl_idct *idct)
 {
    struct ureg_program *shader;
 
-   struct ureg_src l_addr[2], r_addr[2], saddr[2];
+   struct ureg_src l_addr[2], r_addr[2];
 
-   struct ureg_dst addr[2], l[4][2], r[2];
+   struct ureg_dst l[4][2], r[2];
    struct ureg_dst fragment[NR_RENDER_TARGETS];
 
    unsigned i, j;
@@ -229,12 +229,6 @@ create_matrix_frag_shader(struct vl_idct *idct)
    if (!shader)
       return NULL;
 
-   addr[0] = ureg_DECL_temporary(shader);
-   addr[1] = ureg_DECL_temporary(shader);
-
-   saddr[0] = ureg_src(addr[0]);
-   saddr[1] = ureg_src(addr[1]);
-
    l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
    l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
 
@@ -245,37 +239,44 @@ create_matrix_frag_shader(struct vl_idct *idct)
        fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
 
    for (i = 0; i < 4; ++i) {
-      if(i == 0) {
-         ureg_MOV(shader, addr[0], l_addr[0]);
-         ureg_MOV(shader, addr[1], l_addr[1]);
-      } else
-         increment_addr(shader, addr, false, false, idct->buffer_height);
+      l[i][0] = ureg_DECL_temporary(shader);
+      l[i][1] = ureg_DECL_temporary(shader);
+   }
 
-      fetch_four(shader, l[i], saddr, ureg_DECL_sampler(shader, 1));
+   r[0] = ureg_DECL_temporary(shader);
+   r[1] = ureg_DECL_temporary(shader);
+
+   for (i = 1; i < 4; ++i) {
+      increment_addr(shader, l[i], l_addr, false, false, i, idct->buffer_height);
+   }
+
+   for (i = 0; i < 4; ++i) {
+      struct ureg_src s_addr[2];
+      s_addr[0] = i == 0 ? l_addr[0] : ureg_src(l[i][0]);
+      s_addr[1] = i == 0 ? l_addr[1] : ureg_src(l[i][1]);
+      fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1));
    }
    
    for (i = 0; i < NR_RENDER_TARGETS; ++i) {
-      if(i == 0) {
-         ureg_MOV(shader, addr[0], r_addr[0]);
-         ureg_MOV(shader, addr[1], r_addr[1]);
-      } else
-         increment_addr(shader, addr, true, true, BLOCK_HEIGHT);
+      if(i > 0)
+         increment_addr(shader, r, r_addr, true, true, i, BLOCK_HEIGHT);
 
-      fetch_four(shader, r, saddr, ureg_DECL_sampler(shader, 0));
+      struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) };
+      s_addr[0] = i == 0 ? r_addr[0] : ureg_src(r[0]);
+      s_addr[1] = i == 0 ? r_addr[1] : ureg_src(r[1]);
+      fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0));
 
       for (j = 0; j < 4; ++j) {
          matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
       }
-      ureg_release_temporary(shader, r[0]);
-      ureg_release_temporary(shader, r[1]);
    }
 
    for (i = 0; i < 4; ++i) {
       ureg_release_temporary(shader, l[i][0]);
       ureg_release_temporary(shader, l[i][1]);
    }
-   ureg_release_temporary(shader, addr[0]);
-   ureg_release_temporary(shader, addr[1]);
+   ureg_release_temporary(shader, r[0]);
+   ureg_release_temporary(shader, r[1]);
 
    ureg_END(shader);
 
@@ -290,7 +291,7 @@ create_transpose_frag_shader(struct vl_idct *idct)
    struct ureg_src l_addr[2], r_addr[2];
 
    struct ureg_dst l[2], r[2];
-   struct ureg_dst tmp, fragment;
+   struct ureg_dst fragment;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -302,16 +303,18 @@ create_transpose_frag_shader(struct vl_idct *idct)
    r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
    r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
 
+   l[0] = ureg_DECL_temporary(shader);
+   l[1] = ureg_DECL_temporary(shader);
+   r[0] = ureg_DECL_temporary(shader);
+   r[1] = ureg_DECL_temporary(shader);
+
    fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0));
    fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1));
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   tmp = ureg_DECL_temporary(shader);
-   matrix_mul(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), l, r);
-   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, STAGE2_SCALE));
+   matrix_mul(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), l, r);
 
-   ureg_release_temporary(shader, tmp);
    ureg_release_temporary(shader, l[0]);
    ureg_release_temporary(shader, l[1]);
    ureg_release_temporary(shader, r[0]);
@@ -542,7 +545,7 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    for(i = 0; i < BLOCK_HEIGHT; ++i)
       for(j = 0; j < BLOCK_WIDTH; ++j)
          // transpose and scale
-         f[i * pitch + j] = const_matrix[j][i] * STAGE1_SCALE;
+         f[i * pitch + j] = const_matrix[j][i] * sqrtf(SCALE_FACTOR_16_TO_9);
 
    pipe->transfer_unmap(pipe, buf_transfer);
    pipe->transfer_destroy(pipe, buf_transfer);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index eec6a65ee79..25e97efec04 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -273,13 +273,12 @@ static struct ureg_dst
 fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
 {
    struct ureg_src tc[3], sampler[3], eb[2];
-   struct ureg_dst texel, t_tc, t_eb_info, tmp;
+   struct ureg_dst texel, t_tc, t_eb_info;
    unsigned i, label;
 
    texel = ureg_DECL_temporary(shader);
    t_tc = ureg_DECL_temporary(shader);
    t_eb_info = ureg_DECL_temporary(shader);
-   tmp = ureg_DECL_temporary(shader);
 
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
@@ -326,7 +325,6 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
 
    ureg_release_temporary(shader, t_tc);
    ureg_release_temporary(shader, t_eb_info);
-   ureg_release_temporary(shader, tmp);
 
    return texel;
 }
@@ -336,7 +334,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 {
    struct ureg_src ref_frames, bkwd_pred;
    struct ureg_src tc[4], sampler[2];
-   struct ureg_dst ref[2], tmp, result;
+   struct ureg_dst ref[2], result;
    unsigned i, intra_label, bi_label, label;
 
    ref_frames = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES, TGSI_INTERPOLATE_CONSTANT);
@@ -350,14 +348,13 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
       ref[i] = ureg_DECL_temporary(shader);
    }
 
-   tmp = ureg_DECL_temporary(shader);
    result = ureg_DECL_temporary(shader);
 
    ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
 
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ref_frames, ureg_imm1f(shader, 0.0f));
-   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), &intra_label);
-      ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY),
+   ureg_SGE(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_X), ref_frames, ureg_imm1f(shader, 0.0f));
+   ureg_IF(shader, ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X), &intra_label);
+      ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY),
                ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
                tc[1], tc[0]);
 
@@ -367,10 +364,10 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
           * result = tex(field.z ? tc[1] : tc[0], sampler[bkwd_pred ? 1 : 0])
           */
          ureg_IF(shader, bkwd_pred, &label);
-            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]);
+            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[1]);
          ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
          ureg_ELSE(shader, &label);
-            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(tmp), sampler[0]);
+            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
          ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
          ureg_ENDIF(shader);
 
@@ -383,12 +380,11 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
           * else
           *    ref[0..1] = tex(tc[2..3], sampler[0..1])
           */
-         ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[0]);
-
-         ureg_CMP(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY),
+         ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY),
             ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
             tc[3], tc[2]);
-         ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(tmp), sampler[1]);
+         ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
+         ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
 
          ureg_LRP(shader, result, ureg_imm1f(shader, 0.5f),
             ureg_src(ref[0]), ureg_src(ref[1]));
@@ -400,7 +396,6 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 
    for (i = 0; i < 2; ++i)
       ureg_release_temporary(shader, ref[i]);
-   ureg_release_temporary(shader, tmp);
 
    return result;
 }
-- 
cgit v1.2.3


From f2fe3735183a4fac193d54999afa911cc891c1f3 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 16 Dec 2010 22:05:33 +0100
Subject: r600g: improve r600_bc_dump

---
 src/gallium/drivers/r600/r600_asm.c | 160 +++++++++++++++++++++++++++++-------
 1 file changed, 132 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 2a62b06cc60..4b84fa6050a 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -962,7 +962,12 @@ void r600_bc_clear(struct r600_bc *bc)
 
 void r600_bc_dump(struct r600_bc *bc)
 {
-	unsigned i;
+	struct r600_bc_cf *cf;
+	struct r600_bc_alu *alu;
+	struct r600_bc_vtx *vtx;
+	struct r600_bc_tex *tex;
+
+	unsigned i, id;
 	char chip = '6';
 
 	switch (bc->chiprev) {
@@ -979,9 +984,133 @@ void r600_bc_dump(struct r600_bc *bc)
 	}
 	fprintf(stderr, "bytecode %d dw -----------------------\n", bc->ndw);
 	fprintf(stderr, "     %c\n", chip);
-	for (i = 0; i < bc->ndw; i++) {
-		fprintf(stderr, "0x%08X\n", bc->bytecode[i]);
+
+	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
+		id = cf->id;
+
+		switch (cf->inst) {
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+			fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+			fprintf(stderr, "ADDR:%d ", cf->addr);
+			fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache0_mode);
+			fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache0_bank);
+			fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache1_bank);
+			id++;
+			fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+			fprintf(stderr, "INST:%d ", cf->inst);
+			fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache1_mode);
+			fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache0_addr);
+			fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache1_addr);
+			fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
+			break;
+		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+			fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+			fprintf(stderr, "ADDR:%d\n", cf->addr);
+			id++;
+			fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+			fprintf(stderr, "INST:%d ", cf->inst);
+			fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
+			break;
+		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+			fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+			fprintf(stderr, "GPR:%X ", cf->output.gpr);
+			fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
+			fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base);
+			fprintf(stderr, "TYPE:%X\n", cf->output.type);
+			id++;
+			fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+			fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x);
+			fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
+			fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
+			fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
+			fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
+			fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
+			fprintf(stderr, "INST:%d ", cf->output.inst);
+			fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
+			break;
+		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+		case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+		case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+			fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+			fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
+			id++;
+			fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+			fprintf(stderr, "INST:%d ", cf->inst);
+			fprintf(stderr, "COND:%X ", cf->cond);
+			fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
+			break;
+		}
+
+		LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+			id = cf->addr;
+			fprintf(stderr, "%04d %08X\t", id, bc->bytecode[id]);
+			fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
+			fprintf(stderr, "REL:%d ", alu->src[0].rel);
+			fprintf(stderr, "CHAN:%d ", alu->src[0].chan);
+			fprintf(stderr, "NEG:%d) ", alu->src[0].neg);
+			fprintf(stderr, "SRC1(SEL:%d ", alu->src[1].sel);
+			fprintf(stderr, "REL:%d ", alu->src[1].rel);
+			fprintf(stderr, "CHAN:%d ", alu->src[1].chan);
+			fprintf(stderr, "NEG:%d) ", alu->src[1].neg);
+			fprintf(stderr, "LAST:%d)\n", alu->last);
+			id++;
+			if (alu->is_op3) {
+				fprintf(stderr, "%04d %08X\t", id, bc->bytecode[id]);
+				fprintf(stderr, "DST(SEL:%d ", alu->dst.sel);
+				fprintf(stderr, "CHAN:%d ", alu->dst.chan);
+				fprintf(stderr, "REL:%d ", alu->dst.rel);
+				fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp);
+				fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel);
+				fprintf(stderr, "REL:%d ", alu->src[2].rel);
+				fprintf(stderr, "CHAN:%d ", alu->src[2].chan);
+				fprintf(stderr, "NEG:%d) ", alu->src[2].neg);
+				fprintf(stderr, "INST:%d ", alu->inst);
+				fprintf(stderr, "BANK_SWIZZLE:%d\n", alu->bank_swizzle);
+			} else {
+				fprintf(stderr, "%04d %08X\t", id, bc->bytecode[id]);
+				fprintf(stderr, "DST(SEL:%d ", alu->dst.sel);
+				fprintf(stderr, "CHAN:%d ", alu->dst.chan);
+				fprintf(stderr, "REL:%d ", alu->dst.rel);
+				fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp);
+				fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs);
+				fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs);
+				fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write);
+				fprintf(stderr, "OMOD:%d ", alu->omod);
+				fprintf(stderr, "INST:%d ", alu->inst);
+				fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle);
+				fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate);
+				fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate);
+			}
+
+			if (alu->last) {
+				for (i = 0; i < alu->nliteral; i++) {
+					float *f = (float*)(bc->bytecode + id);
+					fprintf(stderr, "%04d %08X %f\n", id, bc->bytecode[id], *f);
+				}
+			}
+		}
+
+		LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
+			//TODO
+		}
+
+		LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+			//TODO
+		}
 	}
+
 	fprintf(stderr, "--------------------------------------\n");
 }
 
@@ -1193,31 +1322,6 @@ out_unknown:
 	R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
 }
 
-static void r600_bc(unsigned ndw, unsigned chiprev, u32 *bytecode)
-{
-	unsigned i;
-	char chip = '6';
-
-	switch (chiprev) {
-	case 1:
-		chip = '7';
-		break;
-	case 2:
-		chip = 'E';
-		break;
-	case 0:
-	default:
-		chip = '6';
-		break;
-	}
-	fprintf(stderr, "bytecode %d dw -----------------------\n", ndw);
-	fprintf(stderr, "    %c\n", chip);
-	for (i = 0; i < ndw; i++) {
-		fprintf(stderr, "0x%08X\n", bytecode[i]);
-	}
-	fprintf(stderr, "--------------------------------------\n");
-}
-
 int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, struct r600_vertex_element *ve)
 {
 	unsigned ndw, i;
-- 
cgit v1.2.3


From 01e3e7c7c3af694ceeb426b6f3e1950fabaa5703 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 16 Dec 2010 22:23:48 +0100
Subject: r600g: fix tex and vtx joining

---
 src/gallium/drivers/r600/r600_asm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 4b84fa6050a..17045011f45 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -550,7 +550,7 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
 	/* each fetch use 4 dwords */
 	bc->cf_last->ndw += 4;
 	bc->ndw += 4;
-	if ((bc->ndw / 4) > 7)
+	if ((bc->cf_last->ndw / 4) > 7)
 		bc->force_add_cf = 1;
 	return 0;
 }
@@ -579,7 +579,7 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
 	/* each texture fetch use 4 dwords */
 	bc->cf_last->ndw += 4;
 	bc->ndw += 4;
-	if ((bc->ndw / 4) > 7)
+	if ((bc->cf_last->ndw / 4) > 7)
 		bc->force_add_cf = 1;
 	return 0;
 }
-- 
cgit v1.2.3


From 8cff56168dea3556d1cbe78b67d966669d2d65aa Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 17 Dec 2010 22:57:36 +0100
Subject: r600g: fix alu dumping

---
 src/gallium/drivers/r600/r600_asm.c | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 17045011f45..1d951a12de6 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1053,9 +1053,9 @@ void r600_bc_dump(struct r600_bc *bc)
 			break;
 		}
 
+		id = cf->addr;
 		LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
-			id = cf->addr;
-			fprintf(stderr, "%04d %08X\t", id, bc->bytecode[id]);
+			fprintf(stderr, "%04d %08X   ", id, bc->bytecode[id]);
 			fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
 			fprintf(stderr, "REL:%d ", alu->src[0].rel);
 			fprintf(stderr, "CHAN:%d ", alu->src[0].chan);
@@ -1066,38 +1066,32 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "NEG:%d) ", alu->src[1].neg);
 			fprintf(stderr, "LAST:%d)\n", alu->last);
 			id++;
+			fprintf(stderr, "%04d %08X %c ", id, bc->bytecode[id], alu->last ? '*' : ' ');
+			fprintf(stderr, "INST:%d ", alu->inst);
+			fprintf(stderr, "DST(SEL:%d ", alu->dst.sel);
+			fprintf(stderr, "CHAN:%d ", alu->dst.chan);
+			fprintf(stderr, "REL:%d ", alu->dst.rel);
+			fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp);
+			fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle);
 			if (alu->is_op3) {
-				fprintf(stderr, "%04d %08X\t", id, bc->bytecode[id]);
-				fprintf(stderr, "DST(SEL:%d ", alu->dst.sel);
-				fprintf(stderr, "CHAN:%d ", alu->dst.chan);
-				fprintf(stderr, "REL:%d ", alu->dst.rel);
-				fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp);
 				fprintf(stderr, "SRC2(SEL:%d ", alu->src[2].sel);
 				fprintf(stderr, "REL:%d ", alu->src[2].rel);
 				fprintf(stderr, "CHAN:%d ", alu->src[2].chan);
-				fprintf(stderr, "NEG:%d) ", alu->src[2].neg);
-				fprintf(stderr, "INST:%d ", alu->inst);
-				fprintf(stderr, "BANK_SWIZZLE:%d\n", alu->bank_swizzle);
+				fprintf(stderr, "NEG:%d)\n", alu->src[2].neg);
 			} else {
-				fprintf(stderr, "%04d %08X\t", id, bc->bytecode[id]);
-				fprintf(stderr, "DST(SEL:%d ", alu->dst.sel);
-				fprintf(stderr, "CHAN:%d ", alu->dst.chan);
-				fprintf(stderr, "REL:%d ", alu->dst.rel);
-				fprintf(stderr, "CLAMP:%d) ", alu->dst.clamp);
 				fprintf(stderr, "SRC0_ABS:%d ", alu->src[0].abs);
 				fprintf(stderr, "SRC1_ABS:%d ", alu->src[1].abs);
 				fprintf(stderr, "WRITE_MASK:%d ", alu->dst.write);
 				fprintf(stderr, "OMOD:%d ", alu->omod);
-				fprintf(stderr, "INST:%d ", alu->inst);
-				fprintf(stderr, "BANK_SWIZZLE:%d ", alu->bank_swizzle);
 				fprintf(stderr, "EXECUTE_MASK:%d ", alu->predicate);
 				fprintf(stderr, "UPDATE_PRED:%d\n", alu->predicate);
 			}
 
+			id++;
 			if (alu->last) {
-				for (i = 0; i < alu->nliteral; i++) {
+				for (i = 0; i < alu->nliteral; i++, id++) {
 					float *f = (float*)(bc->bytecode + id);
-					fprintf(stderr, "%04d %08X %f\n", id, bc->bytecode[id], *f);
+					fprintf(stderr, "%04d %08X   %f\n", id, bc->bytecode[id], *f);
 				}
 			}
 		}
-- 
cgit v1.2.3


From 120a558624b46578412c945eb4b6005be020445c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 18 Dec 2010 00:45:59 +0100
Subject: r600g: optimize away CF ALU instructions even if type doesn't match

---
 src/gallium/drivers/r600/r600_asm.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 1d951a12de6..e792168a679 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -421,16 +421,29 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 	memcpy(nalu, alu, sizeof(struct r600_bc_alu));
 	nalu->nliteral = 0;
 
+	if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
+		/* check if we could add it anyway */
+		if (bc->cf_last->inst == (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) &&
+			type == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE) {
+			LIST_FOR_EACH_ENTRY(alu, &bc->cf_last->alu, list) {
+				if (alu->predicate) {
+					bc->force_add_cf = 1;
+					break;
+				}
+			}
+		} else
+			bc->force_add_cf = 1;
+	}
+
 	/* cf can contains only alu or only vtx or only tex */
-	if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) ||
-		bc->force_add_cf) {
+	if (bc->cf_last == NULL || bc->force_add_cf) {
 		r = r600_bc_add_cf(bc);
 		if (r) {
 			free(nalu);
 			return r;
 		}
-		bc->cf_last->inst = (type << 3);
 	}
+	bc->cf_last->inst = (type << 3);
 	if (!bc->cf_last->curr_bs_head) {
 		bc->cf_last->curr_bs_head = nalu;
 		LIST_INITHEAD(&nalu->bs_list);
-- 
cgit v1.2.3


From 7ff871ee15b61ac2bef7ea91c70e32242581d3f4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 18 Dec 2010 00:47:00 +0100
Subject: [g3dvl] add some missing writemasks

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 25e97efec04..6b78170fb76 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -386,7 +386,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
          ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
          ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
 
-         ureg_LRP(shader, result, ureg_imm1f(shader, 0.5f),
+         ureg_LRP(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.5f),
             ureg_src(ref[0]), ureg_src(ref[1]));
 
       ureg_fixup_label(shader, bi_label, ureg_get_instruction_number(shader));
@@ -419,7 +419,7 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    result = fetch_ref(shader, field);
 
-   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(result));
+   ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(texel), ureg_src(result));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
-- 
cgit v1.2.3


From f6c47f686d1888d70638f87af56aaa3fd5be8a4d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 18 Dec 2010 13:57:18 +0100
Subject: r600g: fix alu slot assignment

---
 src/gallium/drivers/r600/r600_asm.c | 182 +++++++++++++++++++++++++++++++++---
 1 file changed, 167 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index e792168a679..48625e5537c 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -192,6 +192,151 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 	return 0;
 }
 
+/* alu instructions that can ony exits once per group */
+static int is_alu_once_inst(struct r600_bc_alu *alu)
+{
+	return !alu->is_op3 && (
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
+}
+
+static int is_alu_reduction_inst(struct r600_bc_alu *alu)
+{
+	return !alu->is_op3 && (
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
+}
+
+static int is_alu_mova_inst(struct r600_bc_alu *alu)
+{
+	return !alu->is_op3 && (
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+}
+
+/* alu instructions that can only execute on the vector unit */
+static int is_alu_vec_unit_inst(struct r600_bc_alu *alu)
+{
+	return is_alu_reduction_inst(alu) ||
+		is_alu_mova_inst(alu);
+}
+
+/* alu instructions that can only execute on the trans unit */
+static int is_alu_trans_unit_inst(struct r600_bc_alu *alu)
+{
+	if(!alu->is_op3)
+		return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
+	else
+		return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT ||
+			alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 ||
+			alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 ||
+			alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4;
+}
+
+/* alu instructions that can execute on any unit */
+static int is_alu_any_unit_inst(struct r600_bc_alu *alu)
+{
+	return !is_alu_vec_unit_inst(alu) &&
+		!is_alu_trans_unit_inst(alu);
+}
+
+static int assign_alu_units(struct r600_bc_alu *alu_first, struct r600_bc_alu *assignment[5])
+{
+	struct r600_bc_alu *alu;
+	unsigned i, chan, trans;
+
+	for (i = 0; i < 5; i++)
+		assignment[i] = NULL;
+
+	for (alu = alu_first; alu; alu = container_of(alu->list.next, alu, list)) {
+		chan = alu->dst.chan;
+		if (is_alu_trans_unit_inst(alu))
+			trans = 1;
+		else if (is_alu_vec_unit_inst(alu))
+			trans = 0;
+		else if (assignment[chan])
+			trans = 1; // assume ALU_INST_PREFER_VECTOR
+		else
+			trans = 0;
+
+		if (trans) {
+			if (assignment[4]) {
+				assert(0); //ALU.Trans has already been allocated
+				return -1;
+			}
+			assignment[4] = alu;
+		} else {
+			if (assignment[chan]) {
+				assert(0); //ALU.chan has already been allocated
+				return -1;
+			}
+			assignment[chan] = alu;
+		}
+
+		if (alu->last)
+			break;
+	}
+	return 0;
+}
+
 const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210,  //000
 				      SQ_ALU_VEC_120,  //001
 				      SQ_ALU_VEC_102,  //010
@@ -388,25 +533,30 @@ static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu)
 
 static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first)
 {
-	struct r600_bc_alu *alu = NULL;
-	int num_instr = 1;
+	struct r600_bc_alu *assignment[5];
+	int i, r;
 
-	init_gpr(alu_first);
+	r = init_gpr(alu_first);
+	if (r)
+		return r;
 
-	LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
-		num_instr++;
-	}
+	r = assign_alu_units(alu_first, assignment);
+	if (r)
+		return r;
 
-	if (num_instr == 1) {
-		check_scalar(bc, alu_first);
-		
-	} else {
-/*		check_read_slots(bc, bc->cf_last->curr_bs_head);*/
-		check_vector(bc, alu_first);
-		LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
-			check_vector(bc, alu);
+	for (i = 0; i < 4; i++)
+		if (assignment[i]) {
+			r = check_vector(bc, assignment[i]);
+			if (r)
+				return r;
 		}
+
+	if (assignment[4]) {
+		r = check_scalar(bc, assignment[4]);
+		if (r)
+			return r;
 	}
+	
 	return 0;
 }
 
@@ -487,7 +637,9 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 
 	/* process cur ALU instructions for bank swizzle */
 	if (alu->last) {
-		check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);
+		r = check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);
+		if (r)
+			return r;
 		bc->cf_last->curr_bs_head = NULL;
 	}
 	return 0;
-- 
cgit v1.2.3


From 79f881156f0e1ebb3395a151affb336a05a2cf9c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 18 Dec 2010 17:56:36 +0100
Subject: r600g: rework bank swizzle code

---
 src/gallium/drivers/r600/r600_asm.c | 329 ++++++++++++++++++------------------
 src/gallium/drivers/r600/r600_asm.h |   4 -
 2 files changed, 160 insertions(+), 173 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 48625e5537c..82911e92418 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -32,6 +32,9 @@
 #include "r600_formats.h"
 #include "r600d.h"
 
+#define NUM_OF_CYCLES 3
+#define NUM_OF_COMPONENTS 4
+
 static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
 {
 	if(alu->is_op3)
@@ -337,227 +340,215 @@ static int assign_alu_units(struct r600_bc_alu *alu_first, struct r600_bc_alu *a
 	return 0;
 }
 
-const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210,  //000
-				      SQ_ALU_VEC_120,  //001
-				      SQ_ALU_VEC_102,  //010
-
-				      SQ_ALU_VEC_201,  //011
-				      SQ_ALU_VEC_012,  //100
-				      SQ_ALU_VEC_021,  //101
-
-				      SQ_ALU_VEC_012,  //110
-				      SQ_ALU_VEC_012}; //111
-
-const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210,  //000
-				      SQ_ALU_SCL_122,  //001
-				      SQ_ALU_SCL_122,  //010
-
-				      SQ_ALU_SCL_221,  //011
-				      SQ_ALU_SCL_212,  //100
-				      SQ_ALU_SCL_122,  //101
-
-				      SQ_ALU_SCL_122,  //110
-				      SQ_ALU_SCL_122}; //111
-
-static int init_gpr(struct r600_bc_alu *alu)
+struct alu_bank_swizzle {
+	int	hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+	int	hw_cfile_addr[4];
+	int	hw_cfile_elem[4];
+};
+
+const unsigned cycle_for_bank_swizzle_vec[][3] = {
+	[SQ_ALU_VEC_012] = { 0, 1, 2 },
+	[SQ_ALU_VEC_021] = { 0, 2, 1 },
+	[SQ_ALU_VEC_120] = { 1, 2, 0 },
+	[SQ_ALU_VEC_102] = { 1, 0, 2 },
+	[SQ_ALU_VEC_201] = { 2, 0, 1 },
+	[SQ_ALU_VEC_210] = { 2, 1, 0 }
+};
+
+const unsigned cycle_for_bank_swizzle_scl[][3] = {
+	[SQ_ALU_SCL_210] = { 2, 1, 0 },
+	[SQ_ALU_SCL_122] = { 1, 2, 2 },
+	[SQ_ALU_SCL_212] = { 2, 1, 2 },
+	[SQ_ALU_SCL_221] = { 2, 2, 1 }
+};
+
+static void init_bank_swizzle(struct alu_bank_swizzle *bs)
 {
-	int cycle, component;
+	int i, cycle, component;
 	/* set up gpr use */
 	for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
 		for (component = 0; component < NUM_OF_COMPONENTS; component++)
-			 alu->hw_gpr[cycle][component] = -1;
-	return 0;
+			 bs->hw_gpr[cycle][component] = -1;
+	for (i = 0; i < 4; i++)
+		bs->hw_cfile_addr[i] = -1;
+	for (i = 0; i < 4; i++)
+		bs->hw_cfile_elem[i] = -1;
 }
 
-#if 0
-static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle)
+static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
 {
-	if (alu->hw_gpr[cycle][chan] < 0)
-		alu->hw_gpr[cycle][chan] = sel;
-	else if (alu->hw_gpr[cycle][chan] != (int)sel) {
-		R600_ERR("Another scalar operation has already used GPR read port for channel\n");
+	if (bs->hw_gpr[cycle][chan] == -1)
+		bs->hw_gpr[cycle][chan] = sel;
+	else if (bs->hw_gpr[cycle][chan] != (int)sel) {
+		// Another scalar operation has already used GPR read port for channel
 		return -1;
 	}
 	return 0;
 }
 
-static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
+static int reserve_cfile(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
 {
-	int table[3];
-	int ret = 0;
-	switch (swiz) {
-	case SQ_ALU_SCL_210:
-		table[0] = 2; table[1] = 1; table[2] = 0;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_SCL_122:
-		table[0] = 1; table[1] = 2; table[2] = 2;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_SCL_212:
-		table[0] = 2; table[1] = 1; table[2] = 2;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_SCL_221:
-		table[0] = 2; table[1] = 2; table[2] = 1;
-		*p_cycle = table[sel];
-                break;
-		break;
-	default:
-		R600_ERR("bad scalar bank swizzle value\n");
-		ret = -1;
-		break;
+	int res, resmatch = -1, resempty = -1;
+	for (res = 3; res >= 0; --res) {
+		if (bs->hw_cfile_addr[res] == -1)
+			resempty = res;
+		else if (bs->hw_cfile_addr[res] == sel &&
+			bs->hw_cfile_elem[res] == chan)
+			resmatch = res;
 	}
-	return ret;
-}
-
-static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
-{
-	int table[3];
-	int ret;
-
-	switch (swiz) {
-	case SQ_ALU_VEC_012:
-		table[0] = 0; table[1] = 1; table[2] = 2;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_VEC_021:
-		table[0] = 0; table[1] = 2; table[2] = 1;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_VEC_120:
-		table[0] = 1; table[1] = 2; table[2] = 0;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_VEC_102:
-		table[0] = 1; table[1] = 0; table[2] = 2;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_VEC_201:
-		table[0] = 2; table[1] = 0; table[2] = 1;
-                *p_cycle = table[sel];
-                break;
-	case SQ_ALU_VEC_210:
-		table[0] = 2; table[1] = 1; table[2] = 0;
-                *p_cycle = table[sel];
-                break;
-	default:
-		R600_ERR("bad vector bank swizzle value\n");
-		ret = -1;
-		break;
+	if (resmatch != -1)
+		return 0; // Read for this scalar element already reserved, nothing to do here.
+	else if (resempty != -1) {
+		bs->hw_cfile_addr[resempty] = sel;
+		bs->hw_cfile_elem[resempty] = chan;
+	} else {
+		// All cfile read ports are used, cannot reference vector element
+		return -1;
 	}
-	return ret;
+	return 0;	
 }
 
-
-
-static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter)
+static int is_gpr(unsigned sel)
 {
-	int num_src;
-	int i;
-	int channel_swizzle;
-
-	num_src = r600_bc_get_num_operands(alu);
-
-	for (i = 0; i < num_src; i++) {
-		channel_swizzle = alu->src[i].chan;
-		if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3)
-			chan_counter[channel_swizzle]++;
-	}
+	return (sel >= 0 && sel <= 127);
 }
 
-/* we need something like this I think - but this is bogus */
-int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+static int is_cfile(unsigned sel)
 {
-	struct r600_bc_alu *alu;
-	int chan_counter[4]  = { 0 };
-
-	update_chan_counter(alu_first, chan_counter);
-
-	LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
-		update_chan_counter(alu, chan_counter);
-	}
-
-	if (chan_counter[0] > 3 ||
-	    chan_counter[1] > 3 ||
-	    chan_counter[2] > 3 ||
-	    chan_counter[3] > 3) {
-		R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n",
-			 alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]);
-		return -1;
-	}
-	return 0;
+	return (sel > 255 && sel < 512);
 }
-#endif
 
 static int is_const(int sel)
 {
-	if (sel > 255 && sel < 512)
-		return 1;
-	if (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL)
-		return 1;
-	return 0;
+	return is_cfile(sel) ||
+		(sel >= V_SQ_ALU_SRC_0 && 
+		sel <= V_SQ_ALU_SRC_LITERAL);
 }
 
-static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int check_vector(struct r600_bc_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle)
 {
-	unsigned swizzle_key;
+	int r, src, num_src, sel, elem, cycle;
 
-	if (alu->bank_swizzle_force) {
-		alu->bank_swizzle = alu->bank_swizzle_force;
-		return 0;
+	num_src = r600_bc_get_num_operands(alu);
+	for (src = 0; src < num_src; src++) {
+		sel = alu->src[src].sel;
+		elem = alu->src[src].chan;
+		if (is_gpr(sel)) {
+			cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
+			if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
+				// Nothing to do; special-case optimization, 
+				// second source uses first source’s reservation
+				continue;
+			else {
+				r = reserve_gpr(bs, sel, elem, cycle);
+				if (r)
+					return r;
+			}
+		} else if (is_cfile(sel)) {
+			r = reserve_cfile(bs, sel, elem);
+			if (r)
+				return r;
+		}
+		// No restrictions on PV, PS, literal or special constants
 	}
-	swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + 
-		(is_const(alu->src[1].sel) ? 2 : 0 ) + 
-		(is_const(alu->src[2].sel) ? 1 : 0 );
-
-	alu->bank_swizzle = bank_swizzle_scl[swizzle_key];
 	return 0;
 }
 
-static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int check_scalar(struct r600_bc_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle)
 {
-	unsigned swizzle_key;
+	int r, src, num_src, const_count, sel, elem, cycle;
 
-	if (alu->bank_swizzle_force) {
-		alu->bank_swizzle = alu->bank_swizzle_force;
-		return 0;
+	num_src = r600_bc_get_num_operands(alu);
+	for (const_count = 0, src = 0; src < num_src; ++src) {
+		sel = alu->src[src].sel;
+		elem = alu->src[src].chan;
+		if (is_const(sel)) { // Any constant, including literal and inline constants
+			if (const_count >= 2)
+				// More than two references to a constant in
+				// transcendental operation.
+				return -1; 
+			else
+				const_count++;
+		}
+		if (is_cfile(sel)) {
+			r = reserve_cfile(bs, sel, elem);
+			if (r)
+				return r;
+		}
+	}
+	for (src = 0; src < num_src; ++src) {
+		sel = alu->src[src].sel;
+		elem = alu->src[src].chan;
+		if (is_gpr(sel)) {
+			cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
+			if (cycle < const_count)
+				// Cycle for GPR load conflicts with
+				// constant load in transcendental operation.
+				return -1;
+			r = reserve_gpr(bs, sel, elem, cycle);
+			if (r)
+				return r;
+		}
+		// Constants already processed
+		// No restrictions on PV, PS
 	}
-	swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + 
-		(is_const(alu->src[1].sel) ? 2 : 0 ) + 
-		(is_const(alu->src[2].sel) ? 1 : 0 );
-
-	alu->bank_swizzle = bank_swizzle_vec[swizzle_key];
 	return 0;
 }
 
 static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first)
 {
 	struct r600_bc_alu *assignment[5];
+	struct alu_bank_swizzle bs;
+	int bank_swizzle[5];
 	int i, r;
 
-	r = init_gpr(alu_first);
-	if (r)
-		return r;
-
 	r = assign_alu_units(alu_first, assignment);
 	if (r)
 		return r;
 
+	if(alu_first->bank_swizzle_force) {
+		for (i = 0; i < 5; i++)
+			if (assignment[i])
+				assignment[i]->bank_swizzle = assignment[i]->bank_swizzle_force;
+		return 0;
+	}
+
+	// just check every possible combination of bank swizzle
+	// not very efficent, but works on the first try in most of the cases
 	for (i = 0; i < 4; i++)
-		if (assignment[i]) {
-			r = check_vector(bc, assignment[i]);
-			if (r)
-				return r;
+		bank_swizzle[i] = SQ_ALU_VEC_012;
+	bank_swizzle[4] = SQ_ALU_SCL_210;
+	while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
+		init_bank_swizzle(&bs);
+		for (i = 0; i < 4; i++) {
+			if (assignment[i]) {
+				r = check_vector(assignment[i], &bs, bank_swizzle[i]);
+				if (r)
+					break;
+			}
+		}
+		if (!r && assignment[4]) {
+			r = check_scalar(assignment[4], &bs, bank_swizzle[4]);
+		}
+		if (!r) {
+			for (i = 0; i < 5; i++) {
+				if (assignment[i])
+					assignment[i]->bank_swizzle = bank_swizzle[i];
+			}
+			return 0;
 		}
 
-	if (assignment[4]) {
-		r = check_scalar(bc, assignment[4]);
-		if (r)
-			return r;
+		for (i = 0; i < 5; i++) {
+			bank_swizzle[i]++;
+			if (bank_swizzle[i] <= SQ_ALU_VEC_210)
+				break;
+			else
+				bank_swizzle[i] = SQ_ALU_VEC_012;
+		}
 	}
-	
-	return 0;
+
+	// couldn't find a working swizzle
+	return -1;
 }
 
 int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index ac3ed3c8520..fc60079c3e0 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -25,9 +25,6 @@
 
 #include "util/u_double_list.h"
 
-#define NUM_OF_CYCLES 3
-#define NUM_OF_COMPONENTS 4
-
 struct r600_vertex_element;
 struct r600_pipe_context;
 
@@ -61,7 +58,6 @@ struct r600_bc_alu {
 	unsigned			bank_swizzle;
 	unsigned			bank_swizzle_force;
 	u32				value[4];
-	int				hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
 	unsigned			omod;
 };
 
-- 
cgit v1.2.3


From 7b0cc9bd386f62f58c39e66ce29f1423cfccdfb7 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 18 Dec 2010 21:32:16 +0100
Subject: r600g: implement replacing gpr with pv and ps

---
 src/gallium/drivers/r600/r600_asm.c | 64 ++++++++++++++++++++++++++++++++++---
 src/gallium/drivers/r600/r600_asm.h |  2 +-
 src/gallium/drivers/r600/r600_sq.h  |  2 ++
 3 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 82911e92418..c0501f5018d 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -107,7 +107,6 @@ static struct r600_bc_alu *r600_bc_alu(void)
 	if (alu == NULL)
 		return NULL;
 	LIST_INITHEAD(&alu->list);
-	LIST_INITHEAD(&alu->bs_list);
 	return alu;
 }
 
@@ -551,6 +550,63 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *al
 	return -1;
 }
 
+static int replace_gpr_with_pv_ps(struct r600_bc_alu *alu_first, struct r600_bc_alu *alu_prev)
+{
+	struct r600_bc_alu *slots[5];
+	int gpr[5], chan[5];
+	int i, j, r, src, num_src;
+	
+	r = assign_alu_units(alu_prev, slots);
+	if (r)
+		return r;
+
+	for (i = 0; i < 5; ++i) {
+		if(slots[i] && slots[i]->dst.write && !slots[i]->dst.rel) {
+			gpr[i] = slots[i]->dst.sel;
+			if (is_alu_reduction_inst(slots[i]))
+				chan[i] = 0;
+			else
+				chan[i] = slots[i]->dst.chan;
+		} else
+			gpr[i] = -1;
+		
+	}
+
+	r = assign_alu_units(alu_first, slots);
+	if (r)
+		return r;
+
+	for (i = 0; i < 5; ++i) {
+		struct r600_bc_alu *alu = slots[i];
+		if(!alu)
+			continue;
+
+		num_src = r600_bc_get_num_operands(alu);
+		for (src = 0; src < num_src; ++src) {
+			if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
+				continue;
+
+			if (alu->src[src].sel == gpr[4] &&
+				alu->src[src].chan == chan[4]) {
+				alu->src[src].sel = V_SQ_ALU_SRC_PS;
+				alu->src[src].chan = 0;
+				continue;
+			}
+
+			for (j = 0; j < 4; ++j) {
+				if (alu->src[src].sel == gpr[j] &&
+					alu->src[src].chan == j) {
+					alu->src[src].sel = V_SQ_ALU_SRC_PV;
+					alu->src[src].chan = chan[j];
+					break;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
 int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
 {
 	struct r600_bc_alu *nalu = r600_bc_alu();
@@ -587,9 +643,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 	bc->cf_last->inst = (type << 3);
 	if (!bc->cf_last->curr_bs_head) {
 		bc->cf_last->curr_bs_head = nalu;
-		LIST_INITHEAD(&nalu->bs_list);
-	} else {
-		LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list);
 	}
 	/* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)
 	 * worst case */
@@ -628,9 +681,12 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 
 	/* process cur ALU instructions for bank swizzle */
 	if (alu->last) {
+		if (bc->cf_last->prev_bs_head)
+			replace_gpr_with_pv_ps(bc->cf_last->curr_bs_head, bc->cf_last->prev_bs_head);
 		r = check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);
 		if (r)
 			return r;
+		bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
 		bc->cf_last->curr_bs_head = NULL;
 	}
 	return 0;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index fc60079c3e0..94ba902fb5b 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -46,7 +46,6 @@ struct r600_bc_alu_dst {
 
 struct r600_bc_alu {
 	struct list_head		list;
-	struct list_head		bs_list; /* bank swizzle list */
 	struct r600_bc_alu_src		src[3];
 	struct r600_bc_alu_dst		dst;
 	unsigned			inst;
@@ -143,6 +142,7 @@ struct r600_bc_cf {
 	struct list_head		vtx;
 	struct r600_bc_output		output;
 	struct r600_bc_alu		*curr_bs_head;
+	struct r600_bc_alu		*prev_bs_head;
 };
 
 #define FC_NONE				0
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index 0573e63dc82..2401d47e2a2 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -187,6 +187,8 @@
 #define     V_SQ_ALU_SRC_M_1_INT                                     0x000000FB
 #define     V_SQ_ALU_SRC_0_5                                         0x000000FC
 #define     V_SQ_ALU_SRC_LITERAL                                     0x000000FD
+#define     V_SQ_ALU_SRC_PV                                          0x000000FE
+#define     V_SQ_ALU_SRC_PS                                          0x000000FF
 #define     V_SQ_ALU_SRC_PARAM_BASE                                  0x000001C0
 #define   S_SQ_ALU_WORD0_SRC0_REL(x)                                 (((x) & 0x1) << 9)
 #define   G_SQ_ALU_WORD0_SRC0_REL(x)                                 (((x) >> 9) & 0x1)
-- 
cgit v1.2.3


From ac5b174706da61c93cd083a4a913c2caa1d74298 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 20 Dec 2010 22:09:09 +0100
Subject: r600g: merge alu groups

---
 src/gallium/drivers/r600/r600_asm.c | 184 +++++++++++++++++++++++++++++-------
 src/gallium/drivers/r600/r600_asm.h |   1 +
 2 files changed, 149 insertions(+), 36 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index c0501f5018d..9dfd72b53d0 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -308,7 +308,7 @@ static int assign_alu_units(struct r600_bc_alu *alu_first, struct r600_bc_alu *a
 	for (i = 0; i < 5; i++)
 		assignment[i] = NULL;
 
-	for (alu = alu_first; alu; alu = container_of(alu->list.next, alu, list)) {
+	for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
 		chan = alu->dst.chan;
 		if (is_alu_trans_unit_inst(alu))
 			trans = 1;
@@ -494,23 +494,20 @@ static int check_scalar(struct r600_bc_alu *alu, struct alu_bank_swizzle *bs, in
 	return 0;
 }
 
-static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+static int check_and_set_bank_swizzle(struct r600_bc_alu *slots[5])
 {
-	struct r600_bc_alu *assignment[5];
 	struct alu_bank_swizzle bs;
 	int bank_swizzle[5];
-	int i, r;
+	int i, r = 0, forced = 0;
 
-	r = assign_alu_units(alu_first, assignment);
-	if (r)
-		return r;
-
-	if(alu_first->bank_swizzle_force) {
-		for (i = 0; i < 5; i++)
-			if (assignment[i])
-				assignment[i]->bank_swizzle = assignment[i]->bank_swizzle_force;
+	for (i = 0; i < 5; i++)
+		if (slots[i] && slots[i]->bank_swizzle_force) {
+			slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
+			forced = 1;
+		}
+	
+	if (forced)
 		return 0;
-	}
 
 	// just check every possible combination of bank swizzle
 	// not very efficent, but works on the first try in most of the cases
@@ -520,19 +517,19 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *al
 	while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
 		init_bank_swizzle(&bs);
 		for (i = 0; i < 4; i++) {
-			if (assignment[i]) {
-				r = check_vector(assignment[i], &bs, bank_swizzle[i]);
+			if (slots[i]) {
+				r = check_vector(slots[i], &bs, bank_swizzle[i]);
 				if (r)
 					break;
 			}
 		}
-		if (!r && assignment[4]) {
-			r = check_scalar(assignment[4], &bs, bank_swizzle[4]);
+		if (!r && slots[4]) {
+			r = check_scalar(slots[4], &bs, bank_swizzle[4]);
 		}
 		if (!r) {
 			for (i = 0; i < 5; i++) {
-				if (assignment[i])
-					assignment[i]->bank_swizzle = bank_swizzle[i];
+				if (slots[i])
+					slots[i]->bank_swizzle = bank_swizzle[i];
 			}
 			return 0;
 		}
@@ -550,32 +547,27 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *al
 	return -1;
 }
 
-static int replace_gpr_with_pv_ps(struct r600_bc_alu *alu_first, struct r600_bc_alu *alu_prev)
+static int replace_gpr_with_pv_ps(struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
 {
-	struct r600_bc_alu *slots[5];
+	struct r600_bc_alu *prev[5];
 	int gpr[5], chan[5];
 	int i, j, r, src, num_src;
 	
-	r = assign_alu_units(alu_prev, slots);
+	r = assign_alu_units(alu_prev, prev);
 	if (r)
 		return r;
 
 	for (i = 0; i < 5; ++i) {
-		if(slots[i] && slots[i]->dst.write && !slots[i]->dst.rel) {
-			gpr[i] = slots[i]->dst.sel;
-			if (is_alu_reduction_inst(slots[i]))
+		if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
+			gpr[i] = prev[i]->dst.sel;
+			if (is_alu_reduction_inst(prev[i]))
 				chan[i] = 0;
 			else
-				chan[i] = slots[i]->dst.chan;
+				chan[i] = prev[i]->dst.chan;
 		} else
-			gpr[i] = -1;
-		
+			gpr[i] = -1;		
 	}
 
-	r = assign_alu_units(alu_first, slots);
-	if (r)
-		return r;
-
 	for (i = 0; i < 5; ++i) {
 		struct r600_bc_alu *alu = slots[i];
 		if(!alu)
@@ -607,6 +599,109 @@ static int replace_gpr_with_pv_ps(struct r600_bc_alu *alu_first, struct r600_bc_
 	return 0;
 }
 
+static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
+{
+	struct r600_bc_alu *prev[5];
+	struct r600_bc_alu *result[5] = { NULL };
+	int i, j, r, src, num_src;
+	int num_once_inst = 0;
+
+	r = assign_alu_units(alu_prev, prev);
+	if (r)
+		return r;
+
+	for (i = 0; i < 5; ++i) {
+		// TODO: we have literals? forget it!
+		if (prev[i] && prev[i]->nliteral)
+			return 0;
+		if (slots[i] && slots[i]->nliteral)
+			return 0;
+
+
+		// let's check used slots
+		if (prev[i] && !slots[i]) {
+			result[i] = prev[i];
+			num_once_inst += is_alu_once_inst(prev[i]);
+			continue;
+		} else if (prev[i] && slots[i]) {
+			if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
+				// trans unit is still free try to use it
+				if (is_alu_any_unit_inst(slots[i])) {
+					result[i] = prev[i];
+					result[4] = slots[i];
+				} else if (is_alu_any_unit_inst(prev[i])) {
+					result[i] = slots[i];
+					result[4] = prev[i];
+				} else
+					return 0;
+			} else
+				return 0;
+		} else if(!slots[i]) {
+			continue;
+		} else 
+			result[i] = slots[i];
+
+		// let's check source gprs
+		struct r600_bc_alu *alu = slots[i];
+		num_once_inst += is_alu_once_inst(alu);
+
+		num_src = r600_bc_get_num_operands(alu);
+		for (src = 0; src < num_src; ++src) {
+			// constants doesn't matter
+			if (!is_gpr(alu->src[src].sel))
+				continue;
+
+			for (j = 0; j < 5; ++j) {
+				if (!prev[j] || !prev[j]->dst.write)
+					continue;
+
+				// if it's relative then we can't determin which gpr is really used
+				if (prev[j]->dst.chan == alu->src[src].chan &&
+					(prev[j]->dst.sel == alu->src[src].sel ||
+					prev[j]->dst.rel || alu->src[src].rel))
+					return 0;
+			}
+		}
+	}
+
+	/* more than one PRED_ or KILL_ ? */
+	if (num_once_inst > 1)
+		return 0;
+
+	/* check if the result can still be swizzlet */
+	r = check_and_set_bank_swizzle(result);
+	if (r)
+		return 0;
+
+	/* looks like everything worked out right, apply the changes */
+
+	/* sort instructions */
+	for (i = 0; i < 5; ++i) {
+		slots[i] = result[i];
+		if (result[i]) {
+			LIST_DEL(&result[i]->list);
+			result[i]->last = 0;
+			LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu);
+		}
+	}
+
+	/* determine new last instruction */
+	LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1;
+
+	/* determine new first instruction */
+	for (i = 0; i < 5; ++i) {
+		if (result[i]) {
+			bc->cf_last->curr_bs_head = result[i];
+			break;
+		}
+	}
+
+	bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head;
+	bc->cf_last->prev2_bs_head = NULL;
+
+	return 0;
+}
+
 int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
 {
 	struct r600_bc_alu *nalu = r600_bc_alu();
@@ -644,7 +739,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 	if (!bc->cf_last->curr_bs_head) {
 		bc->cf_last->curr_bs_head = nalu;
 	}
-	/* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)
+	/* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots)
 	 * worst case */
 	if (alu->last && (bc->cf_last->ndw >> 1) >= 120) {
 		bc->force_add_cf = 1;
@@ -681,11 +776,28 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 
 	/* process cur ALU instructions for bank swizzle */
 	if (alu->last) {
-		if (bc->cf_last->prev_bs_head)
-			replace_gpr_with_pv_ps(bc->cf_last->curr_bs_head, bc->cf_last->prev_bs_head);
-		r = check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);
+		struct r600_bc_alu *slots[5];
+		r = assign_alu_units(bc->cf_last->curr_bs_head, slots);
 		if (r)
 			return r;
+
+		if (bc->cf_last->prev_bs_head) {
+			r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head);
+			if (r)
+				return r;
+		}
+
+		if (bc->cf_last->prev_bs_head) {
+			r = replace_gpr_with_pv_ps(slots, bc->cf_last->prev_bs_head);
+			if (r)
+				return r;
+		}
+
+		r = check_and_set_bank_swizzle(slots);
+		if (r)
+			return r;
+
+		bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head;
 		bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
 		bc->cf_last->curr_bs_head = NULL;
 	}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 94ba902fb5b..013df54b32e 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -143,6 +143,7 @@ struct r600_bc_cf {
 	struct r600_bc_output		output;
 	struct r600_bc_alu		*curr_bs_head;
 	struct r600_bc_alu		*prev_bs_head;
+	struct r600_bc_alu		*prev2_bs_head;
 };
 
 #define FC_NONE				0
-- 
cgit v1.2.3


From adf89a33296b60c746e813c3def030207cac9ec1 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 21 Dec 2010 21:27:57 +0100
Subject: r600g: fix bug created by 120a558624b46578412c945eb4b6005be020445c

---
 src/gallium/drivers/r600/r600_asm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 9dfd72b53d0..febf191a6c1 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -717,8 +717,8 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 		/* check if we could add it anyway */
 		if (bc->cf_last->inst == (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) &&
 			type == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE) {
-			LIST_FOR_EACH_ENTRY(alu, &bc->cf_last->alu, list) {
-				if (alu->predicate) {
+			LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
+				if (lalu->predicate) {
 					bc->force_add_cf = 1;
 					break;
 				}
-- 
cgit v1.2.3


From 26127d6a2f2df8d8833825bbe96b28ed4fc028f0 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 22 Dec 2010 17:45:51 +0100
Subject: r600g: rework literal handling

---
 src/gallium/drivers/r600/r600_asm.c    | 189 ++++++++++++++++++------------
 src/gallium/drivers/r600/r600_asm.h    |   6 +-
 src/gallium/drivers/r600/r600_shader.c | 203 +++++----------------------------
 src/gallium/drivers/r600/r600_shader.h |   2 +-
 src/gallium/drivers/r600/r700_asm.c    |  10 --
 5 files changed, 148 insertions(+), 262 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index febf191a6c1..1cd0f4cdcfd 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -599,10 +599,90 @@ static int replace_gpr_with_pv_ps(struct r600_bc_alu *slots[5], struct r600_bc_a
 	return 0;
 }
 
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg)
+{
+	switch(value) {
+	case 0:
+		*sel = V_SQ_ALU_SRC_0;
+		break;
+	case 1:
+		*sel = V_SQ_ALU_SRC_1_INT;
+		break;
+	case -1:
+		*sel = V_SQ_ALU_SRC_M_1_INT;
+		break;
+	case 0x3F800000: // 1.0f
+		*sel = V_SQ_ALU_SRC_1;
+		break;
+	case 0x3F000000: // 0.5f
+		*sel = V_SQ_ALU_SRC_0_5;
+		break;
+	case 0xBF800000: // -1.0f
+		*sel = V_SQ_ALU_SRC_1;
+		*neg ^= 1;
+		break;
+	case 0xBF000000: // -0.5f
+		*sel = V_SQ_ALU_SRC_0_5;
+		*neg ^= 1;
+		break;
+	default:
+		*sel = V_SQ_ALU_SRC_LITERAL;
+		break;
+	}
+}
+
+/* compute how many literal are needed */
+static int r600_bc_alu_nliterals(struct r600_bc_alu *alu, uint32_t literal[4], unsigned *nliteral)
+{
+	unsigned num_src = r600_bc_get_num_operands(alu);
+	unsigned i, j;
+
+	for (i = 0; i < num_src; ++i) {
+		if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+			uint32_t value = alu->src[i].value[alu->src[i].chan];
+			unsigned found = 0;
+			for (j = 0; j < *nliteral; ++j) {
+				if (literal[j] == value) {
+					found = 1;
+					break;
+				}
+			}
+			if (!found) {
+				if (*nliteral >= 4)
+					return -EINVAL;
+				literal[(*nliteral)++] = value;
+			}
+		}
+	}
+	return 0;
+}
+
+static void r600_bc_alu_adjust_literals(struct r600_bc_alu *alu, uint32_t literal[4], unsigned nliteral)
+{
+	unsigned num_src = r600_bc_get_num_operands(alu);
+	unsigned i, j;
+
+	for (i = 0; i < num_src; ++i) {
+		if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+			uint32_t value = alu->src[i].value[alu->src[i].chan];
+			for (j = 0; j < nliteral; ++j) {
+				if (literal[j] == value) {
+					alu->src[i].chan = j;
+					break;
+				}
+			}
+		}
+	}
+}
+
 static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
 {
 	struct r600_bc_alu *prev[5];
 	struct r600_bc_alu *result[5] = { NULL };
+	
+	uint32_t literal[4];
+	unsigned nliteral = 0;
+
 	int i, j, r, src, num_src;
 	int num_once_inst = 0;
 
@@ -611,13 +691,12 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], s
 		return r;
 
 	for (i = 0; i < 5; ++i) {
-		// TODO: we have literals? forget it!
-		if (prev[i] && prev[i]->nliteral)
+		/* check number of literals */
+		if (prev[i] && r600_bc_alu_nliterals(prev[i], literal, &nliteral))
 			return 0;
-		if (slots[i] && slots[i]->nliteral)
+		if (slots[i] && r600_bc_alu_nliterals(slots[i], literal, &nliteral))
 			return 0;
 
-
 		// let's check used slots
 		if (prev[i] && !slots[i]) {
 			result[i] = prev[i];
@@ -711,7 +790,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 	if (nalu == NULL)
 		return -ENOMEM;
 	memcpy(nalu, alu, sizeof(struct r600_bc_alu));
-	nalu->nliteral = 0;
 
 	if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
 		/* check if we could add it anyway */
@@ -749,20 +827,10 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 		if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) {
 			bc->ngpr = alu->src[i].sel + 1;
 		}
-		/* compute how many literal are needed
-		 * either 2 or 4 literals
-		 */
-		if (alu->src[i].sel == 253) {
-			if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
-				nalu->nliteral = (alu->src[i].chan + 2) & 0x6;
-			}
-		}
-	}
-	if (!LIST_IS_EMPTY(&bc->cf_last->alu)) {
-		lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
-		if (!lalu->last && lalu->nliteral > nalu->nliteral) {
-			nalu->nliteral = lalu->nliteral;
-		}
+		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
+			r600_bc_special_constants(
+				nalu->src[i].value[nalu->src[i].chan], 
+				&nalu->src[i].sel, &nalu->src[i].neg);
 	}
 	if (alu->dst.sel >= bc->ngpr) {
 		bc->ngpr = alu->dst.sel + 1;
@@ -809,46 +877,6 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
 	return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
 }
 
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
-{
-	struct r600_bc_alu *alu;
-
-	if (bc->cf_last == NULL) {
-		return 0;
-	}
-	if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
-		return 0;
-	}
-	/* all same on EG */
-	if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
-	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
-	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
-	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
-	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
-	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
-	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
-		return 0;
-	}
-	/* same on EG */
-	if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
-	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)) &&
-	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)) &&
-	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
-		LIST_IS_EMPTY(&bc->cf_last->alu)) {
-		R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
-		return -EINVAL;
-	}
-	alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
-	if (!alu->last || !alu->nliteral || alu->literal_added) {
-		return 0;
-	}
-	memcpy(alu->value, value, 4 * 4);
-	bc->cf_last->ndw += alu->nliteral;
-	bc->ndw += alu->nliteral;
-	alu->literal_added = 1;
-	return 0;
-}
-
 int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
 {
 	struct r600_bc_vtx *nvtx = r600_bc_vtx();
@@ -999,8 +1027,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign
 /* r600 only, r700/eg bits in r700_asm.c */
 static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
 {
-	unsigned i;
-
 	/* don't replace gpr by pv or ps for destination register */
 	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
 				S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
@@ -1037,14 +1063,6 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
 					S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
 					S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
 	}
-	if (alu->last) {
-		if (alu->nliteral && !alu->literal_added) {
-			R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
-		}
-		for (i = 0; i < alu->nliteral; i++) {
-			bc->bytecode[id++] = alu->value[i];
-		}
-	}
 	return 0;
 }
 
@@ -1122,8 +1140,10 @@ int r600_bc_build(struct r600_bc *bc)
 	struct r600_bc_alu *alu;
 	struct r600_bc_vtx *vtx;
 	struct r600_bc_tex *tex;
+	uint32_t literal[4];
+	unsigned nliteral;
 	unsigned addr;
-	int r;
+	int i, r;
 
 	if (bc->callstack[0].max > 0)
 		bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
@@ -1140,6 +1160,16 @@ int r600_bc_build(struct r600_bc *bc)
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+			nliteral = 0;
+			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+				r = r600_bc_alu_nliterals(alu, literal, &nliteral);
+				if (r)
+					return r;
+				if (alu->last) {
+					cf->ndw += align(nliteral, 2);
+					nliteral = 0;
+				}
+			}
 			break;
 		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
 		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
@@ -1188,7 +1218,12 @@ int r600_bc_build(struct r600_bc *bc)
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+			nliteral = 0;
 			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+				r = r600_bc_alu_nliterals(alu, literal, &nliteral);
+				if (r)
+					return r;
+				r600_bc_alu_adjust_literals(alu, literal, nliteral);
 				switch(bc->chiprev) {
 				case CHIPREV_R600:
 					r = r600_bc_alu_build(bc, alu, addr);
@@ -1205,7 +1240,10 @@ int r600_bc_build(struct r600_bc *bc)
 					return r;
 				addr += 2;
 				if (alu->last) {
-					addr += alu->nliteral;
+					for (i = 0; i < align(nliteral, 2); ++i) {
+						bc->bytecode[addr++] = literal[i];
+					}
+					nliteral = 0;
 				}
 			}
 			break;
@@ -1292,6 +1330,8 @@ void r600_bc_dump(struct r600_bc *bc)
 	struct r600_bc_tex *tex;
 
 	unsigned i, id;
+	uint32_t literal[4];
+	unsigned nliteral;
 	char chip = '6';
 
 	switch (bc->chiprev) {
@@ -1378,7 +1418,10 @@ void r600_bc_dump(struct r600_bc *bc)
 		}
 
 		id = cf->addr;
+		nliteral = 0;
 		LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+			r600_bc_alu_nliterals(alu, literal, &nliteral);
+
 			fprintf(stderr, "%04d %08X   ", id, bc->bytecode[id]);
 			fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
 			fprintf(stderr, "REL:%d ", alu->src[0].rel);
@@ -1413,10 +1456,12 @@ void r600_bc_dump(struct r600_bc *bc)
 
 			id++;
 			if (alu->last) {
-				for (i = 0; i < alu->nliteral; i++, id++) {
+				for (i = 0; i < nliteral; i++, id++) {
 					float *f = (float*)(bc->bytecode + id);
 					fprintf(stderr, "%04d %08X   %f\n", id, bc->bytecode[id], *f);
 				}
+				id += nliteral & 1;
+				nliteral = 0;
 			}
 		}
 
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 013df54b32e..259b264e4d9 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -34,6 +34,7 @@ struct r600_bc_alu_src {
 	unsigned			neg;
 	unsigned			abs;
 	unsigned			rel;
+	u32				*value;
 };
 
 struct r600_bc_alu_dst {
@@ -52,11 +53,8 @@ struct r600_bc_alu {
 	unsigned			last;
 	unsigned			is_op3;
 	unsigned			predicate;
-	unsigned			nliteral;
-	unsigned			literal_added;
 	unsigned			bank_swizzle;
 	unsigned			bank_swizzle_force;
-	u32				value[4];
 	unsigned			omod;
 };
 
@@ -195,13 +193,13 @@ void eg_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
 int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
 void r600_bc_clear(struct r600_bc *bc);
 int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu);
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value);
 int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
 int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
 int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
 int r600_bc_build(struct r600_bc *bc);
 int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
 int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg);
 void r600_bc_dump(struct r600_bc *bc);
 void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
 void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned count);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 80579b8d871..f2e74c9cee2 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -225,21 +225,23 @@ int r600_pipe_shader(struct pipe_context *ctx, struct r600_pipe_shader *shader)
 	return 0;
 }
 
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
 int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *shader, const struct tgsi_token *tokens)
 {
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
+	u32 *literals;
 	int r;
 
 //fprintf(stderr, "--------------------------------------------------------------\n");
 //tgsi_dump(tokens, 0);
 	shader->shader.family = r600_get_family(rctx->radeon);
-	r = r600_shader_from_tgsi(tokens, &shader->shader);
+	r = r600_shader_from_tgsi(tokens, &shader->shader, &literals);
 	if (r) {
 		R600_ERR("translation from TGSI failed !\n");
 		return r;
 	}
 	r = r600_bc_build(&shader->shader.bc);
+	free(literals);
 	if (r) {
 		R600_ERR("building bytecode failed !\n");
 		return r;
@@ -272,7 +274,6 @@ struct r600_shader_ctx {
 	struct r600_shader_tgsi_instruction	*inst_info;
 	struct r600_bc				*bc;
 	struct r600_shader			*shader;
-	u32					value[4];
 	u32					*literals;
 	u32					nliterals;
 	u32					max_driver_temp_used;
@@ -481,7 +482,7 @@ static int evergreen_gpr_count(struct r600_shader_ctx *ctx)
 	return ctx->num_interp_gpr;
 }
 
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals)
 {
 	struct tgsi_full_immediate *immediate;
 	struct r600_shader_ctx ctx;
@@ -583,9 +584,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 			else
 				ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
 			r = ctx.inst_info->process(&ctx);
-			if (r)
-				goto out_err;
-			r = r600_bc_add_literal(ctx.bc, ctx.value);
 			if (r)
 				goto out_err;
 			break;
@@ -706,7 +704,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 		if (r)
 			goto out_err;
 	}
-	free(ctx.literals);
+	*literals = ctx.literals;
 	tgsi_parse_free(&ctx.parse);
 	return 0;
 out_err:
@@ -740,38 +738,13 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
 			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) {
 
 			index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX;
-			switch(ctx->literals[index]) {
-			case 0:
-				r600_src->sel = V_SQ_ALU_SRC_0;
-				return 0;
-			case 1:
-				r600_src->sel = V_SQ_ALU_SRC_1_INT;
-				return 0;
-			case -1:
-				r600_src->sel = V_SQ_ALU_SRC_M_1_INT;
-				return 0;
-			case 0x3F800000: // 1.0f
-				r600_src->sel = V_SQ_ALU_SRC_1;
+			r600_bc_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg);
+			if (r600_src->sel != V_SQ_ALU_SRC_LITERAL)
 				return 0;
-			case 0x3F000000: // 0.5f
-				r600_src->sel = V_SQ_ALU_SRC_0_5;
-				return 0;
-			case 0xBF800000: // -1.0f
-				r600_src->sel = V_SQ_ALU_SRC_1;
-				r600_src->neg ^= 1;
-				return 0;
-			case 0xBF000000: // -0.5f
-				r600_src->sel = V_SQ_ALU_SRC_0_5;
-				r600_src->neg ^= 1;
-				return 0;
-			}
 		}
 		index = tgsi_src->Register.Index;
 		r600_src->sel = V_SQ_ALU_SRC_LITERAL;
-		ctx->value[0] = ctx->literals[index * 4 + 0];
-		ctx->value[1] = ctx->literals[index * 4 + 1];
-		ctx->value[2] = ctx->literals[index * 4 + 2];
-		ctx->value[3] = ctx->literals[index * 4 + 3];
+		r600_src->value = ctx->literals + index * 4;
 	} else {
 		if (tgsi_src->Register.Indirect)
 			r600_src->rel = V_SQ_REL_RELATIVE;
@@ -877,6 +850,7 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
 				alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
 				alu.src[0].sel = r600_src[i].sel;
 				alu.src[0].chan = k;
+				alu.src[0].value = r600_src[i].value;
 				alu.dst.sel = treg;
 				alu.dst.chan = k;
 				alu.dst.write = 1;
@@ -886,9 +860,6 @@ static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_
 				if (r)
 					return r;
 			}
-			r = r600_bc_add_literal(ctx->bc, &ctx->literals[inst->Src[i].Register.Index * 4]);
-			if (r)
-				return r;
 			r600_src[i].sel = treg;
 			j--;
 		}
@@ -983,12 +954,14 @@ static int tgsi_op2_swap(struct r600_shader_ctx *ctx)
 static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
 			   struct r600_bc_alu_src r600_src[3])
 {
+	static float half_inv_pi = 1.0 /(3.1415926535 * 2);
+	static float double_pi = 3.1415926535 * 2;
+	static float neg_pi = -3.1415926535;
+
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	int r;
-	uint32_t lit_vals[4];
 	struct r600_bc_alu alu;
 
-	memset(lit_vals, 0, 4*4);
 	r = tgsi_split_constant(ctx, r600_src);
 	if (r)
 		return r;
@@ -996,9 +969,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
 	if (r)
 		return r;
 
-	lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
-	lit_vals[1] = fui(0.5f);
-
 	memset(&alu, 0, sizeof(struct r600_bc_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 	alu.is_op3 = 1;
@@ -1012,13 +982,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
 
 	alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
 	alu.src[1].chan = 0;
-	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+	alu.src[1].value = (uint32_t *)&half_inv_pi;
+	alu.src[2].sel = V_SQ_ALU_SRC_0_5;
 	alu.src[2].chan = 1;
 	alu.last = 1;
 	r = r600_bc_add_alu(ctx->bc, &alu);
-	if (r)
-		return r;
-	r = r600_bc_add_literal(ctx->bc, lit_vals);
 	if (r)
 		return r;
 
@@ -1036,14 +1004,6 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
 	if (r)
 		return r;
 
-	if (ctx->bc->chiprev == CHIPREV_R600) {
-		lit_vals[0] = fui(3.1415926535897f * 2.0f);
-		lit_vals[1] = fui(-3.1415926535897f);
-	} else {
-		lit_vals[0] = fui(1.0f);
-		lit_vals[1] = fui(-0.5f);
-	}
-
 	memset(&alu, 0, sizeof(struct r600_bc_alu));
 	alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD);
 	alu.is_op3 = 1;
@@ -1059,11 +1019,18 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
 	alu.src[1].chan = 0;
 	alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
 	alu.src[2].chan = 1;
+
+	if (ctx->bc->chiprev == CHIPREV_R600) {
+		alu.src[1].value = (uint32_t *)&double_pi;
+		alu.src[2].value = (uint32_t *)&neg_pi;
+	} else {
+		alu.src[1].sel = V_SQ_ALU_SRC_1;
+		alu.src[2].sel = V_SQ_ALU_SRC_0_5;
+		alu.src[2].neg = 1;
+	}
+
 	alu.last = 1;
 	r = r600_bc_add_alu(ctx->bc, &alu);
-	if (r)
-		return r;
-	r = r600_bc_add_literal(ctx->bc, lit_vals);
 	if (r)
 		return r;
 	return 0;
@@ -1181,10 +1148,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* dst.w = 1.0; */
@@ -1205,10 +1168,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	return 0;
@@ -1244,9 +1203,6 @@ static int tgsi_kill(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 	}
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
 
 	/* kill must be last in ALU */
 	ctx->bc->force_add_cf = 1;
@@ -1309,10 +1265,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 	if (r)
 		return r;
 
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
-
 	if (inst->Dst[0].Register.WriteMask & (1 << 2))
 	{
 		int chan;
@@ -1331,10 +1283,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		chan = alu.dst.chan;
 		sel = alu.dst.sel;
 
@@ -1357,9 +1305,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 		/* dst.z = exp(tmp.x) */
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -1401,9 +1346,6 @@ static int tgsi_rsq(struct r600_shader_ctx *ctx)
 	alu.dst.write = 1;
 	alu.last = 1;
 	r = r600_bc_add_alu(ctx->bc, &alu);
-	if (r)
-		return r;
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
 	if (r)
 		return r;
 	/* replicate result */
@@ -1452,9 +1394,6 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
 	alu.dst.write = 1;
 	alu.last = 1;
 	r = r600_bc_add_alu(ctx->bc, &alu);
-	if (r)
-		return r;
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
 	if (r)
 		return r;
 	/* replicate result */
@@ -1478,9 +1417,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
 	alu.dst.write = 1;
 	alu.last = 1;
 	r = r600_bc_add_alu(ctx->bc, &alu);
-	if (r)
-		return r;
-	r = r600_bc_add_literal(ctx->bc,ctx->value);
 	if (r)
 		return r;
 	/* b * LOG2(a) */
@@ -1495,9 +1431,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
 	alu.dst.write = 1;
 	alu.last = 1;
 	r = r600_bc_add_alu(ctx->bc, &alu);
-	if (r)
-		return r;
-	r = r600_bc_add_literal(ctx->bc,ctx->value);
 	if (r)
 		return r;
 	/* POW(a,b) = EXP2(b * LOG2(a))*/
@@ -1508,9 +1441,6 @@ static int tgsi_pow(struct r600_shader_ctx *ctx)
 	alu.dst.write = 1;
 	alu.last = 1;
 	r = r600_bc_add_alu(ctx->bc, &alu);
-	if (r)
-		return r;
-	r = r600_bc_add_literal(ctx->bc,ctx->value);
 	if (r)
 		return r;
 	return tgsi_helper_tempx_replicate(ctx);
@@ -1552,9 +1482,6 @@ static int tgsi_ssg(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 	}
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
 
 	/* dst = (-tmp > 0 ? -1 : tmp) */
 	for (i = 0; i < 4; i++) {
@@ -1589,9 +1516,6 @@ static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instru
 	struct r600_bc_alu alu;
 	int i, r;
 
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
 	for (i = 0; i < 4; i++) {
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 		if (!(inst->Dst[0].Register.WriteMask & (1 << i))) {
@@ -1720,6 +1644,7 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
 
 static int tgsi_tex(struct r600_shader_ctx *ctx)
 {
+	static float one_point_five = 1.5f;
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	struct r600_bc_tex tex;
 	struct r600_bc_alu alu;
@@ -1729,7 +1654,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 	boolean src_not_temp =
 		inst->Src[0].Register.File != TGSI_FILE_TEMPORARY &&
 		inst->Src[0].Register.File != TGSI_FILE_INPUT;
-	uint32_t lit_vals[4];
 
 	src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
 
@@ -1878,6 +1802,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 
 		alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
 		alu.src[2].chan = 0;
+		alu.src[2].value = (u32*)&one_point_five;
 
 		alu.dst.sel = ctx->temp_reg;
 		alu.dst.chan = 1;
@@ -1888,11 +1813,6 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		lit_vals[0] = fui(1.5f);
-
-		r = r600_bc_add_literal(ctx->bc, lit_vals);
-		if (r)
-			return r;
 		src_not_temp = FALSE;
 		src_gpr = ctx->temp_reg;
 	}
@@ -2026,9 +1946,6 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 	}
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
 
 	/* (1 - src0) * src2 */
 	for (i = 0; i < lasti + 1; i++) {
@@ -2051,9 +1968,6 @@ static int tgsi_lrp(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 	}
-	r = r600_bc_add_literal(ctx->bc, ctx->value);
-	if (r)
-		return r;
 
 	/* src0 * src1 + (1 - src0) * src2 */
 	for (i = 0; i < lasti + 1; i++) {
@@ -2194,10 +2108,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	for (i = 0; i < 4; i++) {
@@ -2255,10 +2165,6 @@ static int tgsi_xpd(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 	if (use_temp)
 		return tgsi_helper_copy(ctx, inst);
@@ -2291,10 +2197,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
 		alu.src[0].sel = ctx->temp_reg;
 		alu.src[0].chan = 0;
@@ -2306,10 +2208,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* result.y = tmp - floor(tmp); */
@@ -2335,9 +2233,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* result.z = RoughApprox2ToX(tmp);*/
@@ -2358,9 +2253,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* result.w = 1.0;*/
@@ -2378,9 +2270,6 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 	return tgsi_helper_copy(ctx, inst);
 }
@@ -2410,10 +2299,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
 		alu.src[0].sel = ctx->temp_reg;
 		alu.src[0].chan = 0;
@@ -2426,10 +2311,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* result.y = src.x / (2 ^ floor(log2(src.x))); */
@@ -2452,10 +2333,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR);
@@ -2471,10 +2348,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE);
@@ -2490,10 +2363,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
@@ -2509,10 +2378,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		if (r)
 			return r;
 
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
-
 		memset(&alu, 0, sizeof(struct r600_bc_alu));
 
 		alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL);
@@ -2534,10 +2399,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* result.z = log2(src);*/
@@ -2559,10 +2420,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	/* result.w = 1.0; */
@@ -2581,10 +2438,6 @@ static int tgsi_log(struct r600_shader_ctx *ctx)
 		r = r600_bc_add_alu(ctx->bc, &alu);
 		if (r)
 			return r;
-
-		r = r600_bc_add_literal(ctx->bc, ctx->value);
-		if (r)
-			return r;
 	}
 
 	return tgsi_helper_copy(ctx, inst);
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 35b0331525a..935dd6fe3ab 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -47,6 +47,6 @@ struct r600_shader {
 	boolean			uses_kill;
 };
 
-int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader);
+int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader, u32 **literals);
 
 #endif
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index 3eb6fb50ca7..a7f2f54736e 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -29,8 +29,6 @@
 
 int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
 {
-	unsigned i;
-
 	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
 		S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
 		S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
@@ -67,13 +65,5 @@ int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
 			                S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
 		 	                S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
 	}
-	if (alu->last) {
-		if (alu->nliteral && !alu->literal_added) {
-			R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
-		}
-		for (i = 0; i < alu->nliteral; i++) {
-			bc->bytecode[id++] = alu->value[i];
-		}
-	}
 	return 0;
 }
-- 
cgit v1.2.3


From f853ea007816cdad4395b42388e12cd65bb8eb43 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 22 Dec 2010 20:01:39 +0100
Subject: [g3dvl] move code around for more optimal shader generation

---
 src/gallium/auxiliary/vl/vl_idct.c               |   6 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 118 +++++++++++------------
 2 files changed, 61 insertions(+), 63 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 168c2d7d945..b84b447ce6b 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -142,13 +142,13 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage)
 
    ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
    ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
-   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
-   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
-
    ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
       ureg_scalar(vrect, TGSI_SWIZZLE_X),
       ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS));
 
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
+
    ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
 
    if(matrix_stage) {
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 6b78170fb76..b195d7e2c39 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -87,8 +87,7 @@ enum VS_OUTPUT
    VS_O_TEX2,
    VS_O_EB_0,
    VS_O_EB_1,
-   VS_O_REF_FRAMES,
-   VS_O_BKWD_PRED,
+   VS_O_INFO,
    VS_O_MV0,
    VS_O_MV1,
    VS_O_MV2,
@@ -99,12 +98,11 @@ static void *
 create_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src scale;
+   struct ureg_src block_scale, mv_scale;
    struct ureg_src vrect, vpos, eb[2][2], vmv[4];
    struct ureg_src interlaced, frame_pred, ref_frames, bkwd_pred;
    struct ureg_dst t_vpos, t_vtex, t_vmv;
-   struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4];
-   struct ureg_dst o_ref_frames, o_bkwd_pred;
+   struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4], o_info;
    unsigned i, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -133,8 +131,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);   
    o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0);
    o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1);
-   o_ref_frames = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES);
-   o_bkwd_pred = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_BKWD_PRED);
+   o_info = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO);
    
    for (i = 0; i < 4; ++i) {
      vmv[i] = ureg_DECL_vs_input(shader, VS_I_MV0 + i);
@@ -142,12 +139,23 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    }
 
    /*
-    * scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
+    * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
+    * mv_scale = 0.5 / (dst.width, dst.height);
     *
-    * t_vpos = (vpos + vrect) * scale
+    * t_vpos = (vpos + vrect) * block_scale
     * o_vpos.xy = t_vpos
     * o_vpos.zw = vpos
     *
+    * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0]
+    *
+    * o_frame_pred = frame_pred
+    * o_info.x = ref_frames
+    * o_info.y = ref_frames > 0
+    * o_info.z = bkwd_pred
+    *
+    * // Apply motion vectors
+    * o_vmv[0..count] = t_vpos + vmv[0..count] * mv_scale
+    *
     * o_line.xy = vrect * 8
     * o_line.z = interlaced
     *
@@ -156,35 +164,54 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
     *    t_vtex.y = vrect.y * 0.5
     *    t_vtex += vpos
     *
-    *    o_vtex[0].xy = t_vtex * scale
+    *    o_vtex[0].xy = t_vtex * block_scale
     *
     *    t_vtex.y += 0.5
-    *    o_vtex[1].xy = t_vtex * scale
+    *    o_vtex[1].xy = t_vtex * block_scale
     * } else {
     *    o_vtex[0..1].xy = t_vpos
     * }
     * o_vtex[2].xy = t_vpos
     *
-    * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0]
-    *
-    * o_frame_pred = frame_pred
-    * o_ref_frames = ref_frames
-    * o_bkwd_pred = bkwd_pred
-    *
-    * // Apply motion vectors
-    * scale = 0.5 / (dst.width, dst.height);
-    * o_vmv[0..count] = t_vpos + vmv[0..count] * scale
-    *
     */
-   scale = ureg_imm2f(shader,
+   block_scale = ureg_imm2f(shader,
       (float)MACROBLOCK_WIDTH / r->buffer_width,
       (float)MACROBLOCK_HEIGHT / r->buffer_height);
 
+   mv_scale = ureg_imm2f(shader,
+      0.5f / r->buffer_width,
+      0.5f / r->buffer_height);
+
    ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
-   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), scale);
+   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
+   ureg_CMP(shader, ureg_writemask(o_eb[0], TGSI_WRITEMASK_XYZ),
+            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
+            eb[0][1], eb[0][0]);
+   ureg_CMP(shader, ureg_writemask(o_eb[1], TGSI_WRITEMASK_XYZ),
+            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
+            eb[1][1], eb[1][0]);
+
+   ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_X), ref_frames);
+   ureg_SGE(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Y),
+      ureg_scalar(ref_frames, TGSI_SWIZZLE_X), ureg_imm1f(shader, 0.0f));
+   ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Z), ureg_scalar(bkwd_pred, TGSI_SWIZZLE_X));
+
+   ureg_MAD(shader, ureg_writemask(o_vmv[0], TGSI_WRITEMASK_XY), mv_scale, vmv[0], ureg_src(t_vpos));
+   ureg_MAD(shader, ureg_writemask(o_vmv[2], TGSI_WRITEMASK_XY), mv_scale, vmv[2], ureg_src(t_vpos));
+
+   ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)),
+            vmv[0], vmv[1]);
+   ureg_MAD(shader, ureg_writemask(o_vmv[1], TGSI_WRITEMASK_XY), mv_scale, ureg_src(t_vmv), ureg_src(t_vpos));
+
+   ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)),
+            vmv[2], vmv[3]);
+   ureg_MAD(shader, ureg_writemask(o_vmv[3], TGSI_WRITEMASK_XY), mv_scale, ureg_src(t_vmv), ureg_src(t_vpos));
+
    ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
@@ -198,9 +225,9 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
       ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
       ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
       ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex));
-      ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), scale);
+      ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
       ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
-      ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), scale);
+      ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
 
       ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X),
          ureg_scalar(vrect, TGSI_SWIZZLE_Y),
@@ -209,33 +236,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
 
-   ureg_CMP(shader, ureg_writemask(o_eb[0], TGSI_WRITEMASK_XYZ),
-            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
-            eb[0][1], eb[0][0]);
-   ureg_CMP(shader, ureg_writemask(o_eb[1], TGSI_WRITEMASK_XYZ),
-            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
-            eb[1][1], eb[1][0]);
-
-   ureg_MOV(shader, ureg_writemask(o_ref_frames, TGSI_WRITEMASK_X), ref_frames);
-   ureg_MOV(shader, ureg_writemask(o_bkwd_pred, TGSI_WRITEMASK_X), bkwd_pred);
-
-   scale = ureg_imm2f(shader,
-      0.5f / r->buffer_width,
-      0.5f / r->buffer_height);
-
-   ureg_MAD(shader, ureg_writemask(o_vmv[0], TGSI_WRITEMASK_XY), scale, vmv[0], ureg_src(t_vpos));
-   ureg_MAD(shader, ureg_writemask(o_vmv[2], TGSI_WRITEMASK_XY), scale, vmv[2], ureg_src(t_vpos));
-
-   ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)),
-            vmv[0], vmv[1]);
-   ureg_MAD(shader, ureg_writemask(o_vmv[1], TGSI_WRITEMASK_XY), scale, ureg_src(t_vmv), ureg_src(t_vpos));
-
-   ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)),
-            vmv[2], vmv[3]);
-   ureg_MAD(shader, ureg_writemask(o_vmv[3], TGSI_WRITEMASK_XY), scale, ureg_src(t_vmv), ureg_src(t_vpos));
-
    ureg_release_temporary(shader, t_vtex);
    ureg_release_temporary(shader, t_vpos);
    ureg_release_temporary(shader, t_vmv);
@@ -332,13 +332,12 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
 static struct ureg_dst
 fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src ref_frames, bkwd_pred;
+   struct ureg_src info;
    struct ureg_src tc[4], sampler[2];
    struct ureg_dst ref[2], result;
    unsigned i, intra_label, bi_label, label;
 
-   ref_frames = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_REF_FRAMES, TGSI_INTERPOLATE_CONSTANT);
-   bkwd_pred = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BKWD_PRED, TGSI_INTERPOLATE_CONSTANT);
+   info = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO, TGSI_INTERPOLATE_CONSTANT);
 
    for (i = 0; i < 4; ++i)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
@@ -350,20 +349,19 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 
    result = ureg_DECL_temporary(shader);
 
-   ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
+   ureg_MOV(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.5f));
 
-   ureg_SGE(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_X), ref_frames, ureg_imm1f(shader, 0.0f));
-   ureg_IF(shader, ureg_scalar(ureg_src(ref[0]), TGSI_SWIZZLE_X), &intra_label);
+   ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_Y), &intra_label);
       ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY),
                ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
                tc[1], tc[0]);
 
-      ureg_IF(shader, ureg_scalar(ref_frames, TGSI_SWIZZLE_X), &bi_label);
+      ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_X), &bi_label);
 
          /*
           * result = tex(field.z ? tc[1] : tc[0], sampler[bkwd_pred ? 1 : 0])
           */
-         ureg_IF(shader, bkwd_pred, &label);
+         ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_Z), &label);
             ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[1]);
          ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
          ureg_ELSE(shader, &label);
-- 
cgit v1.2.3


From 1b03996b1269fb8b2f89e0a013d8427da660ff83 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 22 Dec 2010 20:50:16 +0100
Subject: r600g: remove some unneded barriers

---
 src/gallium/drivers/r600/r600_asm.c    | 16 ++++++++--------
 src/gallium/drivers/r600/r600_shader.c |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 1cd0f4cdcfd..491086eea87 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1485,21 +1485,21 @@ void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
 	if (count > 8) {
 		bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
 		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
-						S_SQ_CF_WORD1_BARRIER(1) |
+						S_SQ_CF_WORD1_BARRIER(0) |
 						S_SQ_CF_WORD1_COUNT(8 - 1);
 		bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
 		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
-						S_SQ_CF_WORD1_BARRIER(1) |
+						S_SQ_CF_WORD1_BARRIER(0) |
 						S_SQ_CF_WORD1_COUNT(count - 8 - 1);
 	} else {
 		bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
 		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
-						S_SQ_CF_WORD1_BARRIER(1) |
+						S_SQ_CF_WORD1_BARRIER(0) |
 						S_SQ_CF_WORD1_COUNT(count - 1);
 	}
 	bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
 	bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
-			S_SQ_CF_WORD1_BARRIER(1);
+			S_SQ_CF_WORD1_BARRIER(0);
 
 	rstate = &ve->rstate;
 	rstate->id = R600_PIPE_STATE_FETCH_SHADER;
@@ -1521,21 +1521,21 @@ void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned coun
 	if (count > 8) {
 		bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
 		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
-						S_SQ_CF_WORD1_BARRIER(1) |
+						S_SQ_CF_WORD1_BARRIER(0) |
 						S_SQ_CF_WORD1_COUNT(8 - 1);
 		bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
 		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
-						S_SQ_CF_WORD1_BARRIER(1) |
+						S_SQ_CF_WORD1_BARRIER(0) |
 						S_SQ_CF_WORD1_COUNT((count - 8) - 1);
 	} else {
 		bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
 		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
-						S_SQ_CF_WORD1_BARRIER(1) |
+						S_SQ_CF_WORD1_BARRIER(0) |
 						S_SQ_CF_WORD1_COUNT(count - 1);
 	}
 	bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
 	bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
-			S_SQ_CF_WORD1_BARRIER(1);
+			S_SQ_CF_WORD1_BARRIER(0);
 
 	rstate = &ve->rstate;
 	rstate->id = R600_PIPE_STATE_FETCH_SHADER;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index f2e74c9cee2..80972b04ab0 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -603,7 +603,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 		output[i].swizzle_y = 1;
 		output[i].swizzle_z = 2;
 		output[i].swizzle_w = 3;
-		output[i].barrier = 1;
+		output[i].barrier = i == 0;
 		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
 		output[i].array_base = i - pos0;
 		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
-- 
cgit v1.2.3


From 2191d8064e9d177311f0833241d16384ead89e1a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 22 Dec 2010 21:38:48 +0100
Subject: [g3dvl] fix merge conflicts

---
 src/gallium/auxiliary/vl/vl_compositor.c     | 5 ++---
 src/gallium/auxiliary/vl/vl_vertex_buffers.c | 4 ++--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 1a05f369d9b..5187c635e4b 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -516,7 +516,7 @@ static unsigned gen_data(struct vl_compositor *c,
       }
    }
 
-   pipe_buffer_unmap(c->pipe, c->vertex_buf.buffer, buf_transfer);
+   pipe_buffer_unmap(c->pipe, buf_transfer);
 
    return num_rects;
 }
@@ -633,6 +633,5 @@ void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float
       sizeof(struct fragment_shader_consts)
    );
 
-   pipe_buffer_unmap(compositor->pipe, compositor->fs_const_buf,
-                     buf_transfer);
+   pipe_buffer_unmap(compositor->pipe, buf_transfer);
 }
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 4182bad784b..3a69730c9da 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -77,7 +77,7 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks)
    for ( i = 0; i < max_blocks; ++i)
      memcpy(v + i, &const_quad, sizeof(const_quad));
 
-   pipe_buffer_unmap(pipe, quad.buffer, buf_transfer);
+   pipe_buffer_unmap(pipe, buf_transfer);
 
    return quad;
 }
@@ -161,7 +161,7 @@ vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 {
    assert(buffer && pipe);
 
-   pipe_buffer_unmap(pipe, buffer->resource, buffer->transfer);
+   pipe_buffer_unmap(pipe, buffer->transfer);
 }
 
 unsigned
-- 
cgit v1.2.3


From 22de93b435f868daa9f80e88ad2d128bd4cc67c4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 27 Dec 2010 17:58:00 +0100
Subject: r600g: implement register remapping

---
 src/gallium/drivers/r600/r600_asm.c | 617 ++++++++++++++++++++++++++++++------
 1 file changed, 525 insertions(+), 92 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 491086eea87..23c847cb436 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -48,10 +48,10 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
-	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: 
+	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
-	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: 
+	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
@@ -64,7 +64,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
 		return 2;
 
-	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: 
+	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
@@ -79,7 +79,7 @@ static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
 	case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
 		return 1;
 	default: R600_ERR(
-		"Need instruction operand number for 0x%x.\n", alu->inst); 
+		"Need instruction operand number for 0x%x.\n", alu->inst);
 	};
 
 	return 3;
@@ -404,7 +404,7 @@ static int reserve_cfile(struct alu_bank_swizzle *bs, unsigned sel, unsigned cha
 		// All cfile read ports are used, cannot reference vector element
 		return -1;
 	}
-	return 0;	
+	return 0;
 }
 
 static int is_gpr(unsigned sel)
@@ -420,7 +420,7 @@ static int is_cfile(unsigned sel)
 static int is_const(int sel)
 {
 	return is_cfile(sel) ||
-		(sel >= V_SQ_ALU_SRC_0 && 
+		(sel >= V_SQ_ALU_SRC_0 &&
 		sel <= V_SQ_ALU_SRC_LITERAL);
 }
 
@@ -435,7 +435,7 @@ static int check_vector(struct r600_bc_alu *alu, struct alu_bank_swizzle *bs, in
 		if (is_gpr(sel)) {
 			cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
 			if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
-				// Nothing to do; special-case optimization, 
+				// Nothing to do; special-case optimization,
 				// second source uses first source’s reservation
 				continue;
 			else {
@@ -465,7 +465,7 @@ static int check_scalar(struct r600_bc_alu *alu, struct alu_bank_swizzle *bs, in
 			if (const_count >= 2)
 				// More than two references to a constant in
 				// transcendental operation.
-				return -1; 
+				return -1;
 			else
 				const_count++;
 		}
@@ -505,7 +505,7 @@ static int check_and_set_bank_swizzle(struct r600_bc_alu *slots[5])
 			slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
 			forced = 1;
 		}
-	
+
 	if (forced)
 		return 0;
 
@@ -552,7 +552,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc_alu *slots[5], struct r600_bc_a
 	struct r600_bc_alu *prev[5];
 	int gpr[5], chan[5];
 	int i, j, r, src, num_src;
-	
+
 	r = assign_alu_units(alu_prev, prev);
 	if (r)
 		return r;
@@ -565,7 +565,7 @@ static int replace_gpr_with_pv_ps(struct r600_bc_alu *slots[5], struct r600_bc_a
 			else
 				chan[i] = prev[i]->dst.chan;
 		} else
-			gpr[i] = -1;		
+			gpr[i] = -1;
 	}
 
 	for (i = 0; i < 5; ++i) {
@@ -679,7 +679,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], s
 {
 	struct r600_bc_alu *prev[5];
 	struct r600_bc_alu *result[5] = { NULL };
-	
+
 	uint32_t literal[4];
 	unsigned nliteral = 0;
 
@@ -717,7 +717,7 @@ static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5], s
 				return 0;
 		} else if(!slots[i]) {
 			continue;
-		} else 
+		} else
 			result[i] = slots[i];
 
 		// let's check source gprs
@@ -829,7 +829,7 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 		}
 		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
 			r600_bc_special_constants(
-				nalu->src[i].value[nalu->src[i].chan], 
+				nalu->src[i].value[nalu->src[i].chan],
 				&nalu->src[i].sel, &nalu->src[i].neg);
 	}
 	if (alu->dst.sel >= bc->ngpr) {
@@ -1066,16 +1066,59 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
 	return 0;
 }
 
-/* common for r600/r700 - eg in eg_asm.c */
-static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
+enum cf_class
 {
-	unsigned id = cf->id;
+	CF_CLASS_ALU,
+	CF_CLASS_TEXTURE,
+	CF_CLASS_VERTEX,
+	CF_CLASS_EXPORT,
+	CF_CLASS_OTHER
+};
 
+static enum cf_class get_cf_class(struct r600_bc_cf *cf)
+{
 	switch (cf->inst) {
 	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
 	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
 	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
 	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+		return CF_CLASS_ALU;
+
+	case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+		return CF_CLASS_TEXTURE;
+
+	case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+	case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+		return CF_CLASS_VERTEX;
+
+	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+		return CF_CLASS_EXPORT;
+
+	case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+	case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+	case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+	case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+	case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+		return CF_CLASS_OTHER;
+
+	default:
+		R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
+		return -EINVAL;
+	}
+}
+
+/* common for r600/r700 - eg in eg_asm.c */
+static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
+{
+	unsigned id = cf->id;
+
+	switch (get_cf_class(cf)) {
+	case CF_CLASS_ALU:
 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
 			S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
 			S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
@@ -1089,16 +1132,14 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 					S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) |
 					S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
 		break;
-	case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
-	case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
-	case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+	case CF_CLASS_TEXTURE:
+	case CF_CLASS_VERTEX:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
 					S_SQ_CF_WORD1_BARRIER(1) |
 					S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
 		break;
-	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+	case CF_CLASS_EXPORT:
 		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
 			S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
 			S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
@@ -1111,15 +1152,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
 		break;
-	case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
-	case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
-	case V_SQ_CF_WORD1_SQ_CF_INST_POP:
-	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
-	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
-	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
-	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
-	case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
-	case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+	case CF_CLASS_OTHER:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
 					S_SQ_CF_WORD1_BARRIER(1) |
@@ -1134,6 +1167,445 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 	return 0;
 }
 
+struct gpr_usage_range {
+	int	replacement;
+	int32_t	start;
+	int32_t	end;
+};
+
+struct gpr_usage {
+	unsigned		channels:4;
+	int32_t			first_write;
+	unsigned	        nranges;
+	struct gpr_usage_range  *ranges;
+};
+
+static struct gpr_usage_range* add_gpr_usage_range(struct gpr_usage *usage)
+{
+	usage->nranges++;
+	usage->ranges = realloc(usage->ranges, usage->nranges * sizeof(struct gpr_usage_range));
+	if (!usage->ranges)
+		return NULL;
+	return &usage->ranges[usage->nranges-1];
+}
+
+static void notice_gpr_read(struct gpr_usage *usage, uint32_t id, unsigned chan)
+{
+        usage->channels |= 1 << chan;
+        usage->first_write = -1;
+        if (!usage->nranges) {
+                add_gpr_usage_range(usage)->start = -1;
+        }
+        usage->ranges[usage->nranges-1].end = id;
+}
+
+static void notice_gpr_rel_read(struct gpr_usage usage[128], uint32_t id, unsigned chan)
+{
+	unsigned i;
+	for (i = 0; i < 128; ++i)
+		notice_gpr_read(&usage[i], id, chan);
+}
+
+static void notice_gpr_write(struct gpr_usage *usage, uint32_t id, unsigned chan)
+{
+	uint32_t start = usage->first_write != -1 ? usage->first_write : id;
+	usage->channels &= ~(1 << chan);
+	if (usage->channels) {
+		if (usage->first_write == -1)
+			usage->first_write = id;
+	} else if (!usage->nranges || usage->ranges[usage->nranges-1].start != start) {
+		usage->first_write = start;
+		struct gpr_usage_range* range = add_gpr_usage_range(usage);
+                range->start = start;
+                range->end = -1;
+        }
+}
+
+static void notice_gpr_rel_write(struct gpr_usage usage[128], uint32_t id, unsigned chan)
+{
+	/* we can't know wich gpr is really used, so ignore it for now*/
+}
+
+static void notice_alu_src_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], uint32_t id)
+{
+	unsigned src, num_src;
+
+	num_src = r600_bc_get_num_operands(alu);
+	for (src = 0; src < num_src; ++src) {
+		// constants doesn't matter
+		if (!is_gpr(alu->src[src].sel))
+			continue;
+
+		if (alu->src[src].rel)
+			notice_gpr_rel_read(usage, id, alu->src[src].chan);
+		else
+			notice_gpr_read(&usage[alu->src[src].sel], id, alu->src[src].chan);
+	}
+}
+
+static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage usage[128], uint32_t id)
+{
+	struct r600_bc_alu *alu;
+	for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
+		if (alu->dst.write) {
+			if (alu->dst.rel)
+				notice_gpr_rel_write(usage, id, alu->dst.chan);
+			else
+				notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan);
+		}
+
+		if (alu->last)
+			break;
+	}
+}
+
+static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128], uint32_t id)
+{
+	if (tex->src_rel) {
+                if (tex->src_sel_x < 4)
+			notice_gpr_rel_read(usage, id, tex->src_sel_x);
+		if (tex->src_sel_y < 4)
+			notice_gpr_rel_read(usage, id, tex->src_sel_y);
+		if (tex->src_sel_z < 4)
+			notice_gpr_rel_read(usage, id, tex->src_sel_z);
+		if (tex->src_sel_w < 4)
+			notice_gpr_rel_read(usage, id, tex->src_sel_w);
+        } else {
+		if (tex->src_sel_x < 4)
+			notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_x);
+		if (tex->src_sel_y < 4)
+			notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_y);
+		if (tex->src_sel_z < 4)
+			notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_z);
+		if (tex->src_sel_w < 4)
+			notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_w);
+	}
+	if (tex->dst_rel) {
+		if (tex->dst_sel_x != 7)
+			notice_gpr_rel_write(usage, id, 0);
+		if (tex->dst_sel_y != 7)
+			notice_gpr_rel_write(usage, id, 1);
+		if (tex->dst_sel_z != 7)
+			notice_gpr_rel_write(usage, id, 2);
+		if (tex->dst_sel_w != 7)
+			notice_gpr_rel_write(usage, id, 3);
+	} else {
+		if (tex->dst_sel_x != 7)
+			notice_gpr_write(&usage[tex->dst_gpr], id, 0);
+		if (tex->dst_sel_y != 7)
+			notice_gpr_write(&usage[tex->dst_gpr], id, 1);
+		if (tex->dst_sel_z != 7)
+			notice_gpr_write(&usage[tex->dst_gpr], id, 2);
+		if (tex->dst_sel_w != 7)
+			notice_gpr_write(&usage[tex->dst_gpr], id, 3);
+	}
+}
+
+static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128], uint32_t id)
+{
+	notice_gpr_read(&usage[vtx->src_gpr], id, vtx->src_sel_x);
+
+	if (vtx->dst_sel_x != 7)
+		notice_gpr_write(&usage[vtx->dst_gpr], id, 0);
+	if (vtx->dst_sel_y != 7)
+		notice_gpr_write(&usage[vtx->dst_gpr], id, 1);
+	if (vtx->dst_sel_z != 7)
+		notice_gpr_write(&usage[vtx->dst_gpr], id, 2);
+	if (vtx->dst_sel_w != 7)
+		notice_gpr_write(&usage[vtx->dst_gpr], id, 3);
+}
+
+static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], uint32_t id)
+{
+	//TODO handle other memory operations
+	if (cf->output.swizzle_x < 4)
+		notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_x);
+	if (cf->output.swizzle_y < 4)
+		notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_y);
+	if (cf->output.swizzle_z < 4)
+		notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_z);
+	if (cf->output.swizzle_w < 4)
+		notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_w);
+}
+
+static int is_in_range(struct gpr_usage_range* range, int32_t value)
+{
+	int32_t start = range->start == -1 ? 0 : range->start;
+	int32_t end = range->end;
+
+	return start <= value && value < end;
+}
+
+static int rate_replacement(struct gpr_usage *usage, struct gpr_usage_range* range)
+{
+	unsigned i;
+	uint32_t best_start = 0x3FFFFFFF, best_end = 0x3FFFFFFF;
+
+	for (i = 0; i < usage->nranges; ++i) {
+		if (usage->ranges[i].replacement != -1)
+			continue; /* ignore already remapped ranges */
+
+		if (is_in_range(&usage->ranges[i], range->start) ||
+			is_in_range(&usage->ranges[i], range->end))
+			return -1; /* forget it if usages overlap */
+
+		if (range->start >= usage->ranges[i].end)
+			best_start = MIN2(best_start, range->start - usage->ranges[i].end);
+
+		if (range->end != -1 && range->end <= usage->ranges[i].start)
+			best_end = MIN2(best_end, usage->ranges[i].start - range->end);
+	}
+	return best_start + best_end;
+}
+
+static void find_replacement(struct gpr_usage usage[128], unsigned current, struct gpr_usage_range *range)
+{
+	unsigned i;
+	int best_gpr = -1, best_rate = 0x7FFFFFFF;
+
+	if ((range->start & ~0xFF) == (range->end & ~0xFF)) {
+		/* register is just used inside one ALU clause */
+		/* try to use clause temporaryis for it */
+		for (i = 127; i > 123; --i) {
+			int rate = rate_replacement(&usage[i], range);
+
+			if (rate == -1) /* can't be used because ranges overlap */
+				continue;
+
+			if (rate < best_rate) {
+				best_rate = rate;
+				best_gpr = i;
+
+				/* can't get better than this */
+				if (rate == 0)
+					break;
+			}
+		}
+	}
+
+	if (best_gpr == -1) {
+		for (i = 0; i < current; ++i) {
+			int rate = rate_replacement(&usage[i], range);
+
+			if (rate == -1) /* can't be used because ranges overlap */
+				continue;
+
+			if (rate < best_rate) {
+				best_rate = rate;
+				best_gpr = i;
+
+				/* can't get better than this */
+				if (rate == 0)
+					break;
+			}
+		}
+	}
+
+	range->replacement = best_gpr;
+	if (best_gpr != -1) {
+		struct gpr_usage_range *reservation = add_gpr_usage_range(&usage[best_gpr]);
+		reservation->replacement = -1;
+		reservation->start = range->start;
+		reservation->end = range->end;
+	}
+}
+
+static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int32_t id)
+{
+	unsigned i;
+	for (i = 0; i < usage->nranges; ++i) {
+		struct gpr_usage_range* range = &usage->ranges[i];
+
+		if (range->start < id && id <= range->end)
+			return range;
+	}
+	return NULL;
+}
+
+static struct gpr_usage_range *find_dst_range(struct gpr_usage *usage, int32_t id)
+{
+	unsigned i;
+	for (i = 0; i < usage->nranges; ++i) {
+		struct gpr_usage_range* range = &usage->ranges[i];
+		int32_t end = range->end;
+
+		if (range->start <= id && (id < end || end == -1))
+			return range;
+	}
+	return NULL;
+}
+
+static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], uint32_t id)
+{
+	struct gpr_usage_range *range;
+	unsigned src, num_src;
+
+	num_src = r600_bc_get_num_operands(alu);
+	for (src = 0; src < num_src; ++src) {
+		// constants doesn't matter
+		if (!is_gpr(alu->src[src].sel))
+			continue;
+
+		range = find_src_range(&usage[alu->src[src].sel], id);
+		if (range->replacement != -1)
+			alu->src[src].sel = range->replacement;
+	}
+
+	if (alu->dst.write) {
+		range = find_dst_range(&usage[alu->dst.sel], id);
+		assert(range);
+		if (range->replacement == alu->dst.sel) {
+			if (!alu->is_op3)
+				alu->dst.write = 0;
+			else
+				/*TODO: really check that register 123 is useable */
+				alu->dst.sel = 123;
+		} else if (range->replacement != -1) {
+			alu->dst.sel = range->replacement;
+		}
+	}
+}
+
+static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128], uint32_t id)
+{
+	struct gpr_usage_range *range;
+	range = find_src_range(&usage[tex->src_gpr], id);
+	if (range->replacement != -1)
+		tex->src_gpr = range->replacement;
+
+	range = find_dst_range(&usage[tex->dst_gpr], id);
+	if (range->replacement != -1)
+		tex->dst_gpr = range->replacement;
+}
+
+static void replace_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128], uint32_t id)
+{
+	struct gpr_usage_range *range;
+	range = find_src_range(&usage[vtx->src_gpr], id);
+	if (range->replacement != -1)
+		vtx->src_gpr = range->replacement;
+
+	range = find_dst_range(&usage[vtx->dst_gpr], id);
+	if (range->replacement != -1)
+		vtx->dst_gpr = range->replacement;
+}
+
+static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], uint32_t id)
+{
+	//TODO handle other memory operations
+	struct gpr_usage_range *range;
+	range = find_src_range(&usage[cf->output.gpr], id);
+	if (range->replacement != -1)
+		cf->output.gpr = range->replacement;
+}
+
+static void r600_bc_optimize_gprs(struct r600_bc *bc)
+{
+	struct r600_bc_cf *cf;
+	struct r600_bc_alu *first;
+	struct r600_bc_alu *alu;
+	struct r600_bc_vtx *vtx;
+	struct r600_bc_tex *tex;
+	struct gpr_usage usage[128];
+	uint32_t id;
+	unsigned i, j;
+
+	memset(&usage, 0, sizeof(usage));
+	for (i = 0; i < 128; ++i)
+		usage[i].first_write = -1;
+
+	/* first gather some informations about the gpr usage */
+	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
+	        id = cf->id << 8;
+		switch (get_cf_class(cf)) {
+		case CF_CLASS_ALU:
+			first = NULL;
+			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+				if (!first)
+					first = alu;
+				notice_alu_src_gprs(alu, usage, id);
+				if (alu->last) {
+					notice_alu_dst_gprs(first, usage, id);
+					first = NULL;
+					++id;
+				}
+			}
+			break;
+		case CF_CLASS_TEXTURE:
+			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
+				notice_tex_gprs(tex, usage, id++);
+			}
+			break;
+		case CF_CLASS_VERTEX:
+			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+				notice_vtx_gprs(vtx, usage, id++);
+			}
+			break;
+		case CF_CLASS_EXPORT:
+			notice_export_gprs(cf, usage, id);
+			break;
+		case CF_CLASS_OTHER:
+			// TODO implement conditional and loop handling
+			if (cf->inst != V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS)
+				goto out;
+			break;
+		}
+	}
+
+	/* try to optimize gpr usage */
+	for (i = 0; i < 124; ++i) {
+		for (j = 0; j < usage[i].nranges; ++j) {
+			struct gpr_usage_range *range = &usage[i].ranges[j];
+			if (range->start == -1)
+				range->replacement = -1;
+			else if (range->end == -1)
+				range->replacement = i;
+			else
+				find_replacement(usage, i, range);
+
+			if (range->replacement == -1)
+				bc->ngpr = i;
+			else if (range->replacement < i && range->replacement > bc->ngpr)
+				bc->ngpr = range->replacement;
+		}
+	}
+	bc->ngpr++;
+
+	/* apply the changes */
+	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
+	        id = cf->id << 8;
+		switch (get_cf_class(cf)) {
+		case CF_CLASS_ALU:
+			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+				replace_alu_gprs(alu, usage, id);
+				if (alu->last)
+					++id;
+			}
+			break;
+		case CF_CLASS_TEXTURE:
+			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
+				replace_tex_gprs(tex, usage, id++);
+			}
+			break;
+		case CF_CLASS_VERTEX:
+			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+				replace_vtx_gprs(vtx, usage, id++);
+			}
+			break;
+		case CF_CLASS_EXPORT:
+			replace_export_gprs(cf, usage, id);
+			break;
+		case CF_CLASS_OTHER:
+			break;
+		}
+	}
+
+out:
+	for (i = 0; i < 128; ++i) {
+		free(usage[i].ranges);
+	}
+}
+
 int r600_bc_build(struct r600_bc *bc)
 {
 	struct r600_bc_cf *cf;
@@ -1151,15 +1623,14 @@ int r600_bc_build(struct r600_bc *bc)
 		bc->nstack = 1;
 	}
 
+	r600_bc_optimize_gprs(bc);
+
 	/* first path compute addr of each CF block */
 	/* addr start after all the CF instructions */
 	addr = bc->cf_last->id + 2;
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
-		switch (cf->inst) {
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+		switch (get_cf_class(cf)) {
+		case CF_CLASS_ALU:
 			nliteral = 0;
 			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
 				r = r600_bc_alu_nliterals(alu, literal, &nliteral);
@@ -1171,27 +1642,16 @@ int r600_bc_build(struct r600_bc *bc)
 				}
 			}
 			break;
-		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
-		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
-		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+		case CF_CLASS_TEXTURE:
+		case CF_CLASS_VERTEX:
 			/* fetch node need to be 16 bytes aligned*/
 			addr += 3;
 			addr &= 0xFFFFFFFCUL;
 			break;
-		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
-		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
 			break;
-		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
-		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
-		case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
-		case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+		case CF_CLASS_EXPORT:
+			break;
+		case CF_CLASS_OTHER:
 			break;
 		default:
 			R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
@@ -1213,11 +1673,8 @@ int r600_bc_build(struct r600_bc *bc)
 			r = r600_bc_cf_build(bc, cf);
 		if (r)
 			return r;
-		switch (cf->inst) {
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+		switch (get_cf_class(cf)) {
+		case CF_CLASS_ALU:
 			nliteral = 0;
 			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
 				r = r600_bc_alu_nliterals(alu, literal, &nliteral);
@@ -1247,8 +1704,7 @@ int r600_bc_build(struct r600_bc *bc)
 				}
 			}
 			break;
-		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
-		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+		case CF_CLASS_VERTEX:
 			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
 				r = r600_bc_vtx_build(bc, vtx, addr);
 				if (r)
@@ -1256,7 +1712,7 @@ int r600_bc_build(struct r600_bc *bc)
 				addr += 4;
 			}
 			break;
-		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+		case CF_CLASS_TEXTURE:
 			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
 				r = r600_bc_tex_build(bc, tex, addr);
 				if (r)
@@ -1264,19 +1720,8 @@ int r600_bc_build(struct r600_bc *bc)
 				addr += 4;
 			}
 			break;
-		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
-		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
-		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
-		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
-		case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
-		case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+		case CF_CLASS_EXPORT:
+		case CF_CLASS_OTHER:
 			break;
 		default:
 			R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
@@ -1352,11 +1797,8 @@ void r600_bc_dump(struct r600_bc *bc)
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
 		id = cf->id;
 
-		switch (cf->inst) {
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+		switch (get_cf_class(cf)) {
+		case CF_CLASS_ALU:
 			fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
 			fprintf(stderr, "ADDR:%d ", cf->addr);
 			fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache0_mode);
@@ -1370,9 +1812,8 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache1_addr);
 			fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
 			break;
-		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
-		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
-		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+		case CF_CLASS_TEXTURE:
+		case CF_CLASS_VERTEX:
 			fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
 			fprintf(stderr, "ADDR:%d\n", cf->addr);
 			id++;
@@ -1380,8 +1821,7 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "INST:%d ", cf->inst);
 			fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
 			break;
-		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+		case CF_CLASS_EXPORT:
 			fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
 			fprintf(stderr, "GPR:%X ", cf->output.gpr);
 			fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
@@ -1398,15 +1838,8 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "INST:%d ", cf->output.inst);
 			fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
 			break;
-		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
-		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
-		case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
-		case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+			break;
+		case CF_CLASS_OTHER:
 			fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
 			fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
 			id++;
-- 
cgit v1.2.3


From f23dce053282eb1f1ad50041cf87e2e542b63e34 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 27 Dec 2010 20:50:05 +0100
Subject: r600g: fix gpr usage intersection and add conditiona code handling

---
 src/gallium/drivers/r600/r600_asm.c | 120 ++++++++++++++++++++++--------------
 1 file changed, 73 insertions(+), 47 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 23c847cb436..41882725ea9 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -194,20 +194,10 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 	return 0;
 }
 
-/* alu instructions that can ony exits once per group */
-static int is_alu_once_inst(struct r600_bc_alu *alu)
+/* alu predicate instructions */
+static int is_alu_pred_inst(struct r600_bc_alu *alu)
 {
 	return !alu->is_op3 && (
-		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
-		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
-		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
-		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
-		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
-		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
-		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
-		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
-		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
-		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
 		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
 		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
 		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
@@ -234,6 +224,29 @@ static int is_alu_once_inst(struct r600_bc_alu *alu)
 		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
 }
 
+/* alu kill instructions */
+static int is_alu_kill_inst(struct r600_bc_alu *alu)
+{
+	return !alu->is_op3 && (
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+		alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT);
+}
+
+/* alu instructions that can ony exits once per group */
+static int is_alu_once_inst(struct r600_bc_alu *alu)
+{
+	return is_alu_kill_inst(alu) ||
+		is_alu_pred_inst(alu);
+}
+
 static int is_alu_reduction_inst(struct r600_bc_alu *alu)
 {
 	return !alu->is_op3 && (
@@ -822,19 +835,13 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 	if (alu->last && (bc->cf_last->ndw >> 1) >= 120) {
 		bc->force_add_cf = 1;
 	}
-	/* number of gpr == the last gpr used in any alu */
+	/* replace special constants */
 	for (i = 0; i < 3; i++) {
-		if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) {
-			bc->ngpr = alu->src[i].sel + 1;
-		}
 		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
 			r600_bc_special_constants(
 				nalu->src[i].value[nalu->src[i].chan],
 				&nalu->src[i].sel, &nalu->src[i].neg);
 	}
-	if (alu->dst.sel >= bc->ngpr) {
-		bc->ngpr = alu->dst.sel + 1;
-	}
 	LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
 	/* each alu use 2 dwords */
 	bc->cf_last->ndw += 2;
@@ -1206,14 +1213,14 @@ static void notice_gpr_rel_read(struct gpr_usage usage[128], uint32_t id, unsign
 		notice_gpr_read(&usage[i], id, chan);
 }
 
-static void notice_gpr_write(struct gpr_usage *usage, uint32_t id, unsigned chan)
+static void notice_gpr_write(struct gpr_usage *usage, uint32_t id, unsigned chan, int predicate)
 {
 	uint32_t start = usage->first_write != -1 ? usage->first_write : id;
 	usage->channels &= ~(1 << chan);
 	if (usage->channels) {
 		if (usage->first_write == -1)
 			usage->first_write = id;
-	} else if (!usage->nranges || usage->ranges[usage->nranges-1].start != start) {
+	} else if (!usage->nranges || (usage->ranges[usage->nranges-1].start != start && !predicate)) {
 		usage->first_write = start;
 		struct gpr_usage_range* range = add_gpr_usage_range(usage);
                 range->start = start;
@@ -1243,7 +1250,8 @@ static void notice_alu_src_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[
 	}
 }
 
-static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage usage[128], uint32_t id)
+static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage usage[128],
+				uint32_t id, int predicate)
 {
 	struct r600_bc_alu *alu;
 	for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
@@ -1251,7 +1259,7 @@ static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage
 			if (alu->dst.rel)
 				notice_gpr_rel_write(usage, id, alu->dst.chan);
 			else
-				notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan);
+				notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan, predicate);
 		}
 
 		if (alu->last)
@@ -1259,7 +1267,8 @@ static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage
 	}
 }
 
-static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128], uint32_t id)
+static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128],
+				uint32_t id, int predicate)
 {
 	if (tex->src_rel) {
                 if (tex->src_sel_x < 4)
@@ -1291,28 +1300,29 @@ static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128]
 			notice_gpr_rel_write(usage, id, 3);
 	} else {
 		if (tex->dst_sel_x != 7)
-			notice_gpr_write(&usage[tex->dst_gpr], id, 0);
+			notice_gpr_write(&usage[tex->dst_gpr], id, 0, predicate);
 		if (tex->dst_sel_y != 7)
-			notice_gpr_write(&usage[tex->dst_gpr], id, 1);
+			notice_gpr_write(&usage[tex->dst_gpr], id, 1, predicate);
 		if (tex->dst_sel_z != 7)
-			notice_gpr_write(&usage[tex->dst_gpr], id, 2);
+			notice_gpr_write(&usage[tex->dst_gpr], id, 2, predicate);
 		if (tex->dst_sel_w != 7)
-			notice_gpr_write(&usage[tex->dst_gpr], id, 3);
+			notice_gpr_write(&usage[tex->dst_gpr], id, 3, predicate);
 	}
 }
 
-static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128], uint32_t id)
+static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128],
+				uint32_t id, int predicate)
 {
 	notice_gpr_read(&usage[vtx->src_gpr], id, vtx->src_sel_x);
 
 	if (vtx->dst_sel_x != 7)
-		notice_gpr_write(&usage[vtx->dst_gpr], id, 0);
+		notice_gpr_write(&usage[vtx->dst_gpr], id, 0, predicate);
 	if (vtx->dst_sel_y != 7)
-		notice_gpr_write(&usage[vtx->dst_gpr], id, 1);
+		notice_gpr_write(&usage[vtx->dst_gpr], id, 1, predicate);
 	if (vtx->dst_sel_z != 7)
-		notice_gpr_write(&usage[vtx->dst_gpr], id, 2);
+		notice_gpr_write(&usage[vtx->dst_gpr], id, 2, predicate);
 	if (vtx->dst_sel_w != 7)
-		notice_gpr_write(&usage[vtx->dst_gpr], id, 3);
+		notice_gpr_write(&usage[vtx->dst_gpr], id, 3, predicate);
 }
 
 static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], uint32_t id)
@@ -1328,12 +1338,9 @@ static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128
 		notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_w);
 }
 
-static int is_in_range(struct gpr_usage_range* range, int32_t value)
+static int is_intersection(struct gpr_usage_range* a, struct gpr_usage_range* b)
 {
-	int32_t start = range->start == -1 ? 0 : range->start;
-	int32_t end = range->end;
-
-	return start <= value && value < end;
+	return a->start <= b->end && b->start < a->end;
 }
 
 static int rate_replacement(struct gpr_usage *usage, struct gpr_usage_range* range)
@@ -1345,8 +1352,7 @@ static int rate_replacement(struct gpr_usage *usage, struct gpr_usage_range* ran
 		if (usage->ranges[i].replacement != -1)
 			continue; /* ignore already remapped ranges */
 
-		if (is_in_range(&usage->ranges[i], range->start) ||
-			is_in_range(&usage->ranges[i], range->end))
+		if (is_intersection(&usage->ranges[i], range))
 			return -1; /* forget it if usages overlap */
 
 		if (range->start >= usage->ranges[i].end)
@@ -1508,7 +1514,7 @@ static void r600_bc_optimize_gprs(struct r600_bc *bc)
 	struct r600_bc_tex *tex;
 	struct gpr_usage usage[128];
 	uint32_t id;
-	unsigned i, j;
+	unsigned i, j, stack = 0, predicate;
 
 	memset(&usage, 0, sizeof(usage));
 	for (i = 0; i < 128; ++i)
@@ -1519,38 +1525,58 @@ static void r600_bc_optimize_gprs(struct r600_bc *bc)
 	        id = cf->id << 8;
 		switch (get_cf_class(cf)) {
 		case CF_CLASS_ALU:
+			predicate = 0;
 			first = NULL;
 			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
 				if (!first)
 					first = alu;
 				notice_alu_src_gprs(alu, usage, id);
 				if (alu->last) {
-					notice_alu_dst_gprs(first, usage, id);
+					notice_alu_dst_gprs(first, usage, id, predicate || stack > 0);
 					first = NULL;
 					++id;
 				}
+				if (is_alu_pred_inst(alu))
+					predicate++;
 			}
+			if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3)
+				stack += predicate;
+			else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)
+				stack -= 1;
+			else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)
+				stack -= 2;
 			break;
 		case CF_CLASS_TEXTURE:
 			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
-				notice_tex_gprs(tex, usage, id++);
+				notice_tex_gprs(tex, usage, id++, stack > 0);
 			}
 			break;
 		case CF_CLASS_VERTEX:
 			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
-				notice_vtx_gprs(vtx, usage, id++);
+				notice_vtx_gprs(vtx, usage, id++, stack > 0);
 			}
 			break;
 		case CF_CLASS_EXPORT:
 			notice_export_gprs(cf, usage, id);
 			break;
 		case CF_CLASS_OTHER:
-			// TODO implement conditional and loop handling
-			if (cf->inst != V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS)
+			switch (cf->inst) {
+			case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+			case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+			case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+				break;
+
+			case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+				stack -= cf->pop_count;
+				break;
+
+			default:
+				// TODO implement loop handling
 				goto out;
-			break;
+			}
 		}
 	}
+	assert(stack == 0);
 
 	/* try to optimize gpr usage */
 	for (i = 0; i < 124; ++i) {
-- 
cgit v1.2.3


From c099fcd28a30a019d02f8a56dab185c5dd1a9d35 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 28 Dec 2010 13:09:54 +0100
Subject: r600g: optimize unneeded alu moves

---
 src/gallium/drivers/r600/r600_asm.c | 208 +++++++++++++++++++++++++++---------
 1 file changed, 157 insertions(+), 51 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 41882725ea9..7908eee5f79 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -35,6 +35,9 @@
 #define NUM_OF_CYCLES 3
 #define NUM_OF_COMPONENTS 4
 
+#define PREV_ALU(alu) LIST_ENTRY(struct r600_bc_alu, alu->list.prev, list)
+#define NEXT_ALU(alu) LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)
+
 static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
 {
 	if(alu->is_op3)
@@ -182,6 +185,19 @@ static int r600_bc_add_cf(struct r600_bc *bc)
 	return 0;
 }
 
+static void r600_bc_remove_cf(struct r600_bc *bc, struct r600_bc_cf *cf)
+{
+	struct r600_bc_cf *other;
+	LIST_FOR_EACH_ENTRY(other, &bc->cf, list) {
+		if (other->id > cf->id)
+			other->id -= 2;
+		if (other->cf_addr > cf->id)
+			other->cf_addr -= 2;
+	}
+	LIST_DEL(&cf->list);
+	free(cf);
+}
+
 int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 {
 	int r;
@@ -321,7 +337,7 @@ static int assign_alu_units(struct r600_bc_alu *alu_first, struct r600_bc_alu *a
 	for (i = 0; i < 5; i++)
 		assignment[i] = NULL;
 
-	for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
+	for (alu = alu_first; alu; alu = NEXT_ALU(alu)) {
 		chan = alu->dst.chan;
 		if (is_alu_trans_unit_inst(alu))
 			trans = 1;
@@ -884,6 +900,16 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
 	return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
 }
 
+static void r600_bc_remove_alu(struct r600_bc_cf *cf, struct r600_bc_alu *alu)
+{
+	if (alu->last && alu->list.prev != &cf->alu) {
+		PREV_ALU(alu)->last = 1;
+	}
+	LIST_DEL(&alu->list);
+	free(alu);
+	cf->ndw -= 2;
+}
+
 int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
 {
 	struct r600_bc_vtx *nvtx = r600_bc_vtx();
@@ -1213,7 +1239,8 @@ static void notice_gpr_rel_read(struct gpr_usage usage[128], uint32_t id, unsign
 		notice_gpr_read(&usage[i], id, chan);
 }
 
-static void notice_gpr_write(struct gpr_usage *usage, uint32_t id, unsigned chan, int predicate)
+static void notice_gpr_write(struct gpr_usage *usage, uint32_t id, unsigned chan,
+				int predicate, int prefered_replacement)
 {
 	uint32_t start = usage->first_write != -1 ? usage->first_write : id;
 	usage->channels &= ~(1 << chan);
@@ -1223,8 +1250,11 @@ static void notice_gpr_write(struct gpr_usage *usage, uint32_t id, unsigned chan
 	} else if (!usage->nranges || (usage->ranges[usage->nranges-1].start != start && !predicate)) {
 		usage->first_write = start;
 		struct gpr_usage_range* range = add_gpr_usage_range(usage);
+		range->replacement = prefered_replacement;
                 range->start = start;
                 range->end = -1;
+        } else if (usage->ranges[usage->nranges-1].start == start && prefered_replacement != -1) {
+        	usage->ranges[usage->nranges-1].replacement = prefered_replacement;
         }
 }
 
@@ -1258,8 +1288,11 @@ static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage
 		if (alu->dst.write) {
 			if (alu->dst.rel)
 				notice_gpr_rel_write(usage, id, alu->dst.chan);
+			else if (alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV && is_gpr(alu->src[0].sel))
+				notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan,
+						predicate, alu->src[0].sel);
 			else
-				notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan, predicate);
+				notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan, predicate, -1);
 		}
 
 		if (alu->last)
@@ -1300,13 +1333,13 @@ static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128]
 			notice_gpr_rel_write(usage, id, 3);
 	} else {
 		if (tex->dst_sel_x != 7)
-			notice_gpr_write(&usage[tex->dst_gpr], id, 0, predicate);
+			notice_gpr_write(&usage[tex->dst_gpr], id, 0, predicate, -1);
 		if (tex->dst_sel_y != 7)
-			notice_gpr_write(&usage[tex->dst_gpr], id, 1, predicate);
+			notice_gpr_write(&usage[tex->dst_gpr], id, 1, predicate, -1);
 		if (tex->dst_sel_z != 7)
-			notice_gpr_write(&usage[tex->dst_gpr], id, 2, predicate);
+			notice_gpr_write(&usage[tex->dst_gpr], id, 2, predicate, -1);
 		if (tex->dst_sel_w != 7)
-			notice_gpr_write(&usage[tex->dst_gpr], id, 3, predicate);
+			notice_gpr_write(&usage[tex->dst_gpr], id, 3, predicate, -1);
 	}
 }
 
@@ -1316,13 +1349,13 @@ static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128]
 	notice_gpr_read(&usage[vtx->src_gpr], id, vtx->src_sel_x);
 
 	if (vtx->dst_sel_x != 7)
-		notice_gpr_write(&usage[vtx->dst_gpr], id, 0, predicate);
+		notice_gpr_write(&usage[vtx->dst_gpr], id, 0, predicate, -1);
 	if (vtx->dst_sel_y != 7)
-		notice_gpr_write(&usage[vtx->dst_gpr], id, 1, predicate);
+		notice_gpr_write(&usage[vtx->dst_gpr], id, 1, predicate, -1);
 	if (vtx->dst_sel_z != 7)
-		notice_gpr_write(&usage[vtx->dst_gpr], id, 2, predicate);
+		notice_gpr_write(&usage[vtx->dst_gpr], id, 2, predicate, -1);
 	if (vtx->dst_sel_w != 7)
-		notice_gpr_write(&usage[vtx->dst_gpr], id, 3, predicate);
+		notice_gpr_write(&usage[vtx->dst_gpr], id, 3, predicate, -1);
 }
 
 static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], uint32_t id)
@@ -1338,6 +1371,33 @@ static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128
 		notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_w);
 }
 
+static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int32_t id)
+{
+	unsigned i;
+	for (i = 0; i < usage->nranges; ++i) {
+		struct gpr_usage_range* range = &usage->ranges[i];
+
+		if (range->start < id && id <= range->end)
+			return range;
+	}
+	assert(0); /* should not happen */
+	return NULL;
+}
+
+static struct gpr_usage_range *find_dst_range(struct gpr_usage *usage, int32_t id)
+{
+	unsigned i;
+	for (i = 0; i < usage->nranges; ++i) {
+		struct gpr_usage_range* range = &usage->ranges[i];
+		int32_t end = range->end;
+
+		if (range->start <= id && (id < end || end == -1))
+			return range;
+	}
+	assert(0); /* should not happen */
+	return NULL;
+}
+
 static int is_intersection(struct gpr_usage_range* a, struct gpr_usage_range* b)
 {
 	return a->start <= b->end && b->start < a->end;
@@ -1369,7 +1429,23 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current, stru
 	unsigned i;
 	int best_gpr = -1, best_rate = 0x7FFFFFFF;
 
-	if ((range->start & ~0xFF) == (range->end & ~0xFF)) {
+	if (range->replacement != -1 && range->replacement <= current) {
+		struct gpr_usage_range *other = find_src_range(&usage[range->replacement], range->start);
+		if (other->replacement != -1)
+			range->replacement = other->replacement;
+	}
+
+	if (range->replacement != -1 && range->replacement < current) {
+		int rate = rate_replacement(&usage[range->replacement], range);
+
+		/* check if prefered replacement can be used */
+		if (rate != -1) {
+			best_rate = rate;
+			best_gpr = range->replacement;
+		}
+	}
+
+	if (best_gpr == -1 && (range->start & ~0xFF) == (range->end & ~0xFF)) {
 		/* register is just used inside one ALU clause */
 		/* try to use clause temporaryis for it */
 		for (i = 127; i > 123; --i) {
@@ -1416,31 +1492,6 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current, stru
 	}
 }
 
-static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int32_t id)
-{
-	unsigned i;
-	for (i = 0; i < usage->nranges; ++i) {
-		struct gpr_usage_range* range = &usage->ranges[i];
-
-		if (range->start < id && id <= range->end)
-			return range;
-	}
-	return NULL;
-}
-
-static struct gpr_usage_range *find_dst_range(struct gpr_usage *usage, int32_t id)
-{
-	unsigned i;
-	for (i = 0; i < usage->nranges; ++i) {
-		struct gpr_usage_range* range = &usage->ranges[i];
-		int32_t end = range->end;
-
-		if (range->start <= id && (id < end || end == -1))
-			return range;
-	}
-	return NULL;
-}
-
 static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], uint32_t id)
 {
 	struct gpr_usage_range *range;
@@ -1459,7 +1510,6 @@ static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128
 
 	if (alu->dst.write) {
 		range = find_dst_range(&usage[alu->dst.sel], id);
-		assert(range);
 		if (range->replacement == alu->dst.sel) {
 			if (!alu->is_op3)
 				alu->dst.write = 0;
@@ -1505,10 +1555,57 @@ static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[12
 		cf->output.gpr = range->replacement;
 }
 
-static void r600_bc_optimize_gprs(struct r600_bc *bc)
+static void optimize_alu_inst(struct r600_bc_cf *cf, struct r600_bc_alu *alu)
 {
-	struct r600_bc_cf *cf;
-	struct r600_bc_alu *first;
+	struct r600_bc_alu *alu_next;
+	unsigned chan;
+	unsigned src, num_src;
+
+	/* check if a MOV could be optimized away */
+	if (alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV) {
+
+		/* destination equals source? */
+		if (alu->dst.sel != alu->src[0].sel ||
+			alu->dst.chan != alu->src[0].chan)
+			return;
+
+		/* any special handling for the source? */
+		if (alu->src[0].rel || alu->src[0].neg || alu->src[0].abs)
+			return;
+
+		/* any special handling for destination? */
+		if (alu->dst.rel || alu->dst.clamp)
+			return;
+
+		/* ok find next instruction group and check if ps/pv is used */
+		for (alu_next = alu; !alu_next->last; alu_next = NEXT_ALU(alu_next));
+
+		if (alu_next->list.next != &cf->alu) {
+			chan = is_alu_reduction_inst(alu) ? 0 : alu->dst.chan;
+			for (alu_next = NEXT_ALU(alu_next); alu_next; alu_next = NEXT_ALU(alu_next)) {
+				num_src = r600_bc_get_num_operands(alu_next);
+				for (src = 0; src < num_src; ++src) {
+					if (alu_next->src[src].sel == V_SQ_ALU_SRC_PV &&
+						alu_next->src[src].chan == chan)
+						return;
+
+					if (alu_next->src[src].sel == V_SQ_ALU_SRC_PS)
+						return;
+				}
+
+				if (alu_next->last)
+					break;
+			}
+		}
+
+		r600_bc_remove_alu(cf, alu);
+	}
+}
+
+static void r600_bc_optimize(struct r600_bc *bc)
+{
+	struct r600_bc_cf *cf, *next_cf;
+	struct r600_bc_alu *first, *next_alu;
 	struct r600_bc_alu *alu;
 	struct r600_bc_vtx *vtx;
 	struct r600_bc_tex *tex;
@@ -1521,8 +1618,10 @@ static void r600_bc_optimize_gprs(struct r600_bc *bc)
 		usage[i].first_write = -1;
 
 	/* first gather some informations about the gpr usage */
+	id = 0;
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
-	        id = cf->id << 8;
+	        id += 0x100;
+	        id &= ~0xFF;
 		switch (get_cf_class(cf)) {
 		case CF_CLASS_ALU:
 			predicate = 0;
@@ -1598,14 +1697,21 @@ static void r600_bc_optimize_gprs(struct r600_bc *bc)
 	bc->ngpr++;
 
 	/* apply the changes */
-	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
-	        id = cf->id << 8;
+	id = 0;
+	LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
+	        id += 0x100;
+	        id &= ~0xFF;
 		switch (get_cf_class(cf)) {
 		case CF_CLASS_ALU:
-			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+			LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
 				replace_alu_gprs(alu, usage, id);
 				if (alu->last)
 					++id;
+
+				optimize_alu_inst(cf, alu);
+			}
+			if (LIST_IS_EMPTY(&cf->alu)) {
+				r600_bc_remove_cf(bc, cf);
 			}
 			break;
 		case CF_CLASS_TEXTURE:
@@ -1649,7 +1755,7 @@ int r600_bc_build(struct r600_bc *bc)
 		bc->nstack = 1;
 	}
 
-	r600_bc_optimize_gprs(bc);
+	r600_bc_optimize(bc);
 
 	/* first path compute addr of each CF block */
 	/* addr start after all the CF instructions */
@@ -1817,7 +1923,7 @@ void r600_bc_dump(struct r600_bc *bc)
 		chip = '6';
 		break;
 	}
-	fprintf(stderr, "bytecode %d dw -----------------------\n", bc->ndw);
+	fprintf(stderr, "bytecode %d dw -- %d gprs -----------------------\n", bc->ndw, bc->ngpr);
 	fprintf(stderr, "     %c\n", chip);
 
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
@@ -1826,7 +1932,7 @@ void r600_bc_dump(struct r600_bc *bc)
 		switch (get_cf_class(cf)) {
 		case CF_CLASS_ALU:
 			fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
-			fprintf(stderr, "ADDR:%d ", cf->addr);
+			fprintf(stderr, "ADDR:%04d ", cf->addr);
 			fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache0_mode);
 			fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache0_bank);
 			fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache1_bank);
@@ -1841,7 +1947,7 @@ void r600_bc_dump(struct r600_bc *bc)
 		case CF_CLASS_TEXTURE:
 		case CF_CLASS_VERTEX:
 			fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
-			fprintf(stderr, "ADDR:%d\n", cf->addr);
+			fprintf(stderr, "ADDR:%04d\n", cf->addr);
 			id++;
 			fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
 			fprintf(stderr, "INST:%d ", cf->inst);
@@ -1849,7 +1955,7 @@ void r600_bc_dump(struct r600_bc *bc)
 			break;
 		case CF_CLASS_EXPORT:
 			fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
-			fprintf(stderr, "GPR:%X ", cf->output.gpr);
+			fprintf(stderr, "GPR:%d ", cf->output.gpr);
 			fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
 			fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base);
 			fprintf(stderr, "TYPE:%X\n", cf->output.type);
@@ -1867,7 +1973,7 @@ void r600_bc_dump(struct r600_bc *bc)
 			break;
 		case CF_CLASS_OTHER:
 			fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
-			fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
+			fprintf(stderr, "ADDR:%04d\n", cf->cf_addr);
 			id++;
 			fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
 			fprintf(stderr, "INST:%d ", cf->inst);
-- 
cgit v1.2.3


From 3a49b567cf7a8e671e90831dd131c0408ffb6252 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 28 Dec 2010 23:45:46 +0100
Subject: r600g: rearange exports

---
 src/gallium/drivers/r600/eg_asm.c      |  18 ++--
 src/gallium/drivers/r600/r600_asm.c    | 158 ++++++++++++++++++++++++---------
 src/gallium/drivers/r600/r600_asm.h    |   2 -
 src/gallium/drivers/r600/r600_shader.c |  17 +---
 4 files changed, 130 insertions(+), 65 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index b79875c7c75..ff1c2e5b9e1 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -32,10 +32,12 @@
 int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 {
 	unsigned id = cf->id;
+	unsigned end_of_program = bc->cf.prev == &cf->list;
 
 	switch (cf->inst) {
 	case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
 	case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+		assert(!end_of_program);
 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
 			S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
 			S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
@@ -51,8 +53,9 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
-					S_SQ_CF_WORD1_BARRIER(1) |
-					S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+			S_SQ_CF_WORD1_BARRIER(1) |
+			S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1) |
+			S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
 		break;
 	case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
 	case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
@@ -65,8 +68,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end_of_program);
 		break;
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
@@ -79,9 +82,10 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
-					S_SQ_CF_WORD1_BARRIER(1) |
-					S_SQ_CF_WORD1_COND(cf->cond) |
-					S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+			S_SQ_CF_WORD1_BARRIER(1) |
+			S_SQ_CF_WORD1_COND(cf->cond) |
+			S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
+			S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
 
 		break;
 	default:
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 7908eee5f79..f455080ce8f 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -198,6 +198,26 @@ static void r600_bc_remove_cf(struct r600_bc *bc, struct r600_bc_cf *cf)
 	free(cf);
 }
 
+static void r600_bc_move_cf(struct r600_bc *bc, struct r600_bc_cf *cf, struct r600_bc_cf *prev)
+{
+	unsigned new_id = prev->id + 2;
+	struct r600_bc_cf *other;
+
+	LIST_DEL(&cf->list);
+	LIST_FOR_EACH_ENTRY(other, &bc->cf, list) {
+		if (other->id > cf->id)
+			other->id -= 2;
+		if (other->id >= new_id)
+			other->id += 2;
+		if (other->cf_addr > cf->id)
+			other->cf_addr -= 2;
+		if (other->cf_addr >= new_id)
+			other->cf_addr += 2;
+	}
+	cf->id = new_id;
+	LIST_ADD(&cf->list, &prev->list);
+}
+
 int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 {
 	int r;
@@ -205,7 +225,7 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 	r = r600_bc_add_cf(bc);
 	if (r)
 		return r;
-	bc->cf_last->inst = output->inst;
+	bc->cf_last->inst = BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
 	memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output));
 	return 0;
 }
@@ -1149,9 +1169,11 @@ static enum cf_class get_cf_class(struct r600_bc_cf *cf)
 static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 {
 	unsigned id = cf->id;
+	unsigned end_of_program = bc->cf.prev == &cf->list;
 
 	switch (get_cf_class(cf)) {
 	case CF_CLASS_ALU:
+		assert(!end_of_program);
 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
 			S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) |
 			S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) |
@@ -1169,8 +1191,9 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 	case CF_CLASS_VERTEX:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
-					S_SQ_CF_WORD1_BARRIER(1) |
-					S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
+			S_SQ_CF_WORD1_BARRIER(1) |
+			S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1) |
+			S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
 		break;
 	case CF_CLASS_EXPORT:
 		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
@@ -1182,15 +1205,16 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end_of_program);
 		break;
 	case CF_CLASS_OTHER:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
-					S_SQ_CF_WORD1_BARRIER(1) |
-			                S_SQ_CF_WORD1_COND(cf->cond) |
-			                S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+			S_SQ_CF_WORD1_BARRIER(1) |
+			S_SQ_CF_WORD1_COND(cf->cond) |
+			S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
+			S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
 
 		break;
 	default:
@@ -1209,6 +1233,7 @@ struct gpr_usage_range {
 struct gpr_usage {
 	unsigned		channels:4;
 	int32_t			first_write;
+	int32_t			last_write;
 	unsigned	        nranges;
 	struct gpr_usage_range  *ranges;
 };
@@ -1222,28 +1247,33 @@ static struct gpr_usage_range* add_gpr_usage_range(struct gpr_usage *usage)
 	return &usage->ranges[usage->nranges-1];
 }
 
-static void notice_gpr_read(struct gpr_usage *usage, uint32_t id, unsigned chan)
+static void notice_gpr_read(struct gpr_usage *usage, int32_t id, unsigned chan)
 {
         usage->channels |= 1 << chan;
         usage->first_write = -1;
         if (!usage->nranges) {
-                add_gpr_usage_range(usage)->start = -1;
+        	struct gpr_usage_range* range = add_gpr_usage_range(usage);
+        	range->replacement = -1;
+                range->start = -1;
+                range->end = -1;
         }
-        usage->ranges[usage->nranges-1].end = id;
+        if (usage->ranges[usage->nranges-1].end < id)
+		usage->ranges[usage->nranges-1].end = id;
 }
 
-static void notice_gpr_rel_read(struct gpr_usage usage[128], uint32_t id, unsigned chan)
+static void notice_gpr_rel_read(struct gpr_usage usage[128], int32_t id, unsigned chan)
 {
 	unsigned i;
 	for (i = 0; i < 128; ++i)
 		notice_gpr_read(&usage[i], id, chan);
 }
 
-static void notice_gpr_write(struct gpr_usage *usage, uint32_t id, unsigned chan,
+static void notice_gpr_write(struct gpr_usage *usage, int32_t id, unsigned chan,
 				int predicate, int prefered_replacement)
 {
-	uint32_t start = usage->first_write != -1 ? usage->first_write : id;
+	int32_t start = usage->first_write != -1 ? usage->first_write : id;
 	usage->channels &= ~(1 << chan);
+	usage->last_write = id;
 	if (usage->channels) {
 		if (usage->first_write == -1)
 			usage->first_write = id;
@@ -1258,12 +1288,12 @@ static void notice_gpr_write(struct gpr_usage *usage, uint32_t id, unsigned chan
         }
 }
 
-static void notice_gpr_rel_write(struct gpr_usage usage[128], uint32_t id, unsigned chan)
+static void notice_gpr_rel_write(struct gpr_usage usage[128], int32_t id, unsigned chan)
 {
 	/* we can't know wich gpr is really used, so ignore it for now*/
 }
 
-static void notice_alu_src_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], uint32_t id)
+static void notice_alu_src_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], int32_t id)
 {
 	unsigned src, num_src;
 
@@ -1281,7 +1311,7 @@ static void notice_alu_src_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[
 }
 
 static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage usage[128],
-				uint32_t id, int predicate)
+				int32_t id, int predicate)
 {
 	struct r600_bc_alu *alu;
 	for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
@@ -1301,7 +1331,7 @@ static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage
 }
 
 static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128],
-				uint32_t id, int predicate)
+				int32_t id, int predicate)
 {
 	if (tex->src_rel) {
                 if (tex->src_sel_x < 4)
@@ -1344,7 +1374,7 @@ static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128]
 }
 
 static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128],
-				uint32_t id, int predicate)
+				int32_t id, int predicate)
 {
 	notice_gpr_read(&usage[vtx->src_gpr], id, vtx->src_sel_x);
 
@@ -1358,17 +1388,27 @@ static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128]
 		notice_gpr_write(&usage[vtx->dst_gpr], id, 3, predicate, -1);
 }
 
-static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], uint32_t id)
+static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128],
+				struct r600_bc_cf *export_cf[128], int32_t export_remap[128],
+				int32_t id)
 {
 	//TODO handle other memory operations
+	struct gpr_usage *output = &usage[cf->output.gpr];
+	int32_t last_write = (output->last_write + 0x100) & ~0xFF;
+
+	if (last_write != id && !export_cf[cf->output.gpr]) {
+		export_cf[cf->output.gpr] = cf;
+		export_remap[cf->output.gpr] = last_write;
+		id = last_write;
+	}
 	if (cf->output.swizzle_x < 4)
-		notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_x);
+		notice_gpr_read(output, id, cf->output.swizzle_x);
 	if (cf->output.swizzle_y < 4)
-		notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_y);
+		notice_gpr_read(output, id, cf->output.swizzle_y);
 	if (cf->output.swizzle_z < 4)
-		notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_z);
+		notice_gpr_read(output, id, cf->output.swizzle_z);
 	if (cf->output.swizzle_w < 4)
-		notice_gpr_read(&usage[cf->output.gpr], id, cf->output.swizzle_w);
+		notice_gpr_read(output, id, cf->output.swizzle_w);
 }
 
 static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int32_t id)
@@ -1406,7 +1446,7 @@ static int is_intersection(struct gpr_usage_range* a, struct gpr_usage_range* b)
 static int rate_replacement(struct gpr_usage *usage, struct gpr_usage_range* range)
 {
 	unsigned i;
-	uint32_t best_start = 0x3FFFFFFF, best_end = 0x3FFFFFFF;
+	int32_t best_start = 0x3FFFFFFF, best_end = 0x3FFFFFFF;
 
 	for (i = 0; i < usage->nranges; ++i) {
 		if (usage->ranges[i].replacement != -1)
@@ -1492,7 +1532,7 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current, stru
 	}
 }
 
-static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], uint32_t id)
+static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], int32_t id)
 {
 	struct gpr_usage_range *range;
 	unsigned src, num_src;
@@ -1522,7 +1562,7 @@ static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128
 	}
 }
 
-static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128], uint32_t id)
+static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128], int32_t id)
 {
 	struct gpr_usage_range *range;
 	range = find_src_range(&usage[tex->src_gpr], id);
@@ -1534,7 +1574,7 @@ static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128
 		tex->dst_gpr = range->replacement;
 }
 
-static void replace_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128], uint32_t id)
+static void replace_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128], int32_t id)
 {
 	struct gpr_usage_range *range;
 	range = find_src_range(&usage[vtx->src_gpr], id);
@@ -1546,13 +1586,15 @@ static void replace_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128
 		vtx->dst_gpr = range->replacement;
 }
 
-static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], uint32_t id)
+static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], int32_t id)
 {
 	//TODO handle other memory operations
 	struct gpr_usage_range *range;
 	range = find_src_range(&usage[cf->output.gpr], id);
 	if (range->replacement != -1)
 		cf->output.gpr = range->replacement;
+
+	cf->output.barrier = 1;
 }
 
 static void optimize_alu_inst(struct r600_bc_cf *cf, struct r600_bc_alu *alu)
@@ -1610,18 +1652,23 @@ static void r600_bc_optimize(struct r600_bc *bc)
 	struct r600_bc_vtx *vtx;
 	struct r600_bc_tex *tex;
 	struct gpr_usage usage[128];
-	uint32_t id;
+
+	/* assume that each gpr is exported only once */
+	struct r600_bc_cf *export_cf[128] = { NULL };
+	int32_t export_remap[128];
+
+	int32_t id, stack_start_id = -1;
 	unsigned i, j, stack = 0, predicate;
 
 	memset(&usage, 0, sizeof(usage));
-	for (i = 0; i < 128; ++i)
+	for (i = 0; i < 128; ++i) {
 		usage[i].first_write = -1;
+		usage[i].last_write = -1;
+	}
 
 	/* first gather some informations about the gpr usage */
 	id = 0;
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
-	        id += 0x100;
-	        id &= ~0xFF;
 		switch (get_cf_class(cf)) {
 		case CF_CLASS_ALU:
 			predicate = 0;
@@ -1656,8 +1703,8 @@ static void r600_bc_optimize(struct r600_bc *bc)
 			}
 			break;
 		case CF_CLASS_EXPORT:
-			notice_export_gprs(cf, usage, id);
-			break;
+			notice_export_gprs(cf, usage, export_cf, export_remap, id);
+			continue; // don't increment id
 		case CF_CLASS_OTHER:
 			switch (cf->inst) {
 			case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
@@ -1674,6 +1721,20 @@ static void r600_bc_optimize(struct r600_bc *bc)
 				goto out;
 			}
 		}
+		/* ensue exports are placed outside of conditional blocks */
+		if (stack && stack_start_id == -1)
+			stack_start_id = id & ~0xFF;
+		else if (!stack && stack_start_id != -1) {
+			for (i = 0; i < 124; ++i) {
+				if ((usage[i].last_write & ~0xFF) >= stack_start_id) {
+					usage[i].last_write = id & ~0xFF;
+				}
+			}
+			stack_start_id = -1;
+		}
+
+		id += 0x100;
+	        id &= ~0xFF;
 	}
 	assert(stack == 0);
 
@@ -1699,8 +1760,6 @@ static void r600_bc_optimize(struct r600_bc *bc)
 	/* apply the changes */
 	id = 0;
 	LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
-	        id += 0x100;
-	        id &= ~0xFF;
 		switch (get_cf_class(cf)) {
 		case CF_CLASS_ALU:
 			LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
@@ -1726,10 +1785,18 @@ static void r600_bc_optimize(struct r600_bc *bc)
 			break;
 		case CF_CLASS_EXPORT:
 			replace_export_gprs(cf, usage, id);
-			break;
+			continue; // don't increment id
 		case CF_CLASS_OTHER:
 			break;
 		}
+	        id += 0x100;
+	        id &= ~0xFF;
+		for (i = 0; i < 128; ++i) {
+			if (export_cf[i] && id == export_remap[i]) {
+				r600_bc_move_cf(bc, export_cf[i], cf);
+				replace_export_gprs(export_cf[i], usage, id);
+			}
+		}
 	}
 
 out:
@@ -1744,6 +1811,7 @@ int r600_bc_build(struct r600_bc *bc)
 	struct r600_bc_alu *alu;
 	struct r600_bc_vtx *vtx;
 	struct r600_bc_tex *tex;
+	struct r600_bc_cf *exports[4] = { NULL };
 	uint32_t literal[4];
 	unsigned nliteral;
 	unsigned addr;
@@ -1782,6 +1850,8 @@ int r600_bc_build(struct r600_bc *bc)
 			break;
 			break;
 		case CF_CLASS_EXPORT:
+			if (cf->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT))
+				exports[cf->output.type] = cf;
 			break;
 		case CF_CLASS_OTHER:
 			break;
@@ -1793,6 +1863,14 @@ int r600_bc_build(struct r600_bc *bc)
 		addr += cf->ndw;
 		bc->ndw = cf->addr + cf->ndw;
 	}
+
+	/* set export done on last export of each type */
+	for (i = 0; i < 4; ++i) {
+		if (exports[i]) {
+			exports[i]->inst = BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
+		}
+	}
+
 	free(bc->bytecode);
 	bc->bytecode = calloc(1, bc->ndw * 4);
 	if (bc->bytecode == NULL)
@@ -1967,9 +2045,7 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
 			fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
 			fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
-			fprintf(stderr, "INST:%d ", cf->output.inst);
-			fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
-			break;
+			fprintf(stderr, "INST:%d\n", cf->inst);
 			break;
 		case CF_CLASS_OTHER:
 			fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 259b264e4d9..112f6f0af7a 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -108,8 +108,6 @@ struct r600_bc_vtx {
 struct r600_bc_output {
 	unsigned			array_base;
 	unsigned			type;
-	unsigned			end_of_program;
-	unsigned			inst;
 	unsigned			elem_size;
 	unsigned			gpr;
 	unsigned			swizzle_x;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 2bfe1be5447..e3a72f89c66 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -487,7 +487,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 	struct tgsi_full_immediate *immediate;
 	struct r600_shader_ctx ctx;
 	struct r600_bc_output output[32];
-	unsigned output_done, noutput;
+	unsigned noutput;
 	unsigned opcode;
 	int i, r = 0, pos0;
 
@@ -606,7 +606,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 		output[i].barrier = i == 0;
 		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
 		output[i].array_base = i - pos0;
-		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
 		switch (ctx.type) {
 		case TGSI_PROCESSOR_VERTEX:
 			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
@@ -669,7 +668,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 			output[i].barrier = 1;
 			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
 			output[i].array_base = 0;
-			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
 			noutput++;
 		}
 	}
@@ -685,19 +683,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 		output[0].barrier = 1;
 		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
 		output[0].array_base = 0;
-		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
 		noutput++;
 	}
-	/* set export done on last export of each type */
-	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
-		if (i == (noutput - 1)) {
-			output[i].end_of_program = 1;
-		}
-		if (!(output_done & (1 << output[i].type))) {
-			output_done |= (1 << output[i].type);
-			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
-		}
-	}
 	/* add output to bytecode */
 	for (i = 0; i < noutput; i++) {
 		r = r600_bc_add_output(ctx.bc, &output[i]);
@@ -731,7 +718,7 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
 	memset(r600_src, 0, sizeof(struct r600_bc_alu_src));
 	r600_src->neg = tgsi_src->Register.Negate;
 	r600_src->abs = tgsi_src->Register.Absolute;
-	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {		
+	if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) {
 		int index;
 		if((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) &&
 			(tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) &&
-- 
cgit v1.2.3


From 10dbabc48129ca64b0cb27b3a05d040e725b481c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 7 Jan 2011 18:26:51 +0100
Subject: r600g: fully implement barrier handling

---
 src/gallium/drivers/r600/eg_asm.c      |   8 +-
 src/gallium/drivers/r600/r600_asm.c    | 268 ++++++++++++++++++++++++---------
 src/gallium/drivers/r600/r600_asm.h    |   2 +-
 src/gallium/drivers/r600/r600_shader.c |   3 -
 4 files changed, 205 insertions(+), 76 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index ff1c2e5b9e1..4233afa8c53 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -46,14 +46,14 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
-					S_SQ_CF_ALU_WORD1_BARRIER(1) |
+					S_SQ_CF_ALU_WORD1_BARRIER(cf->barrier) |
 					S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
 		break;
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX:
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
-			S_SQ_CF_WORD1_BARRIER(1) |
+			S_SQ_CF_WORD1_BARRIER(cf->barrier) |
 			S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1) |
 			S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
 		break;
@@ -67,7 +67,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end_of_program);
 		break;
@@ -82,7 +82,7 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
-			S_SQ_CF_WORD1_BARRIER(1) |
+			S_SQ_CF_WORD1_BARRIER(cf->barrier) |
 			S_SQ_CF_WORD1_COND(cf->cond) |
 			S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
 			S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index f455080ce8f..e78c1298332 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -100,6 +100,7 @@ static struct r600_bc_cf *r600_bc_cf(void)
 	LIST_INITHEAD(&cf->alu);
 	LIST_INITHEAD(&cf->vtx);
 	LIST_INITHEAD(&cf->tex);
+	cf->barrier = 1;
 	return cf;
 }
 
@@ -198,20 +199,25 @@ static void r600_bc_remove_cf(struct r600_bc *bc, struct r600_bc_cf *cf)
 	free(cf);
 }
 
-static void r600_bc_move_cf(struct r600_bc *bc, struct r600_bc_cf *cf, struct r600_bc_cf *prev)
+static void r600_bc_move_cf(struct r600_bc *bc, struct r600_bc_cf *cf, struct r600_bc_cf *next)
 {
+	struct r600_bc_cf *prev = LIST_ENTRY(struct r600_bc_cf, next->list.prev, list);
+	unsigned old_id = cf->id;
 	unsigned new_id = prev->id + 2;
 	struct r600_bc_cf *other;
 
+	if (prev == cf)
+		return; /* position hasn't changed */
+
 	LIST_DEL(&cf->list);
 	LIST_FOR_EACH_ENTRY(other, &bc->cf, list) {
-		if (other->id > cf->id)
+		if (other->id > old_id)
 			other->id -= 2;
 		if (other->id >= new_id)
 			other->id += 2;
-		if (other->cf_addr > cf->id)
+		if (other->cf_addr > old_id)
 			other->cf_addr -= 2;
-		if (other->cf_addr >= new_id)
+		if (other->cf_addr > new_id)
 			other->cf_addr += 2;
 	}
 	cf->id = new_id;
@@ -1183,15 +1189,15 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) |
 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) |
 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) |
-					S_SQ_CF_ALU_WORD1_BARRIER(1) |
-					S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) |
-					S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
+			S_SQ_CF_ALU_WORD1_BARRIER(cf->barrier) |
+			S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) |
+			S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
 		break;
 	case CF_CLASS_TEXTURE:
 	case CF_CLASS_VERTEX:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
-			S_SQ_CF_WORD1_BARRIER(1) |
+			S_SQ_CF_WORD1_BARRIER(cf->barrier) |
 			S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1) |
 			S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
 		break;
@@ -1204,14 +1210,14 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end_of_program);
 		break;
 	case CF_CLASS_OTHER:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
-			S_SQ_CF_WORD1_BARRIER(1) |
+			S_SQ_CF_WORD1_BARRIER(cf->barrier) |
 			S_SQ_CF_WORD1_COND(cf->cond) |
 			S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
 			S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
@@ -1233,7 +1239,7 @@ struct gpr_usage_range {
 struct gpr_usage {
 	unsigned		channels:4;
 	int32_t			first_write;
-	int32_t			last_write;
+	int32_t			last_write[4];
 	unsigned	        nranges;
 	struct gpr_usage_range  *ranges;
 };
@@ -1268,12 +1274,16 @@ static void notice_gpr_rel_read(struct gpr_usage usage[128], int32_t id, unsigne
 		notice_gpr_read(&usage[i], id, chan);
 }
 
+static void notice_gpr_last_write(struct gpr_usage *usage, int32_t id, unsigned chan)
+{
+        usage->last_write[chan] = id;
+}
+
 static void notice_gpr_write(struct gpr_usage *usage, int32_t id, unsigned chan,
 				int predicate, int prefered_replacement)
 {
 	int32_t start = usage->first_write != -1 ? usage->first_write : id;
 	usage->channels &= ~(1 << chan);
-	usage->last_write = id;
 	if (usage->channels) {
 		if (usage->first_write == -1)
 			usage->first_write = id;
@@ -1286,11 +1296,21 @@ static void notice_gpr_write(struct gpr_usage *usage, int32_t id, unsigned chan,
         } else if (usage->ranges[usage->nranges-1].start == start && prefered_replacement != -1) {
         	usage->ranges[usage->nranges-1].replacement = prefered_replacement;
         }
+        notice_gpr_last_write(usage, id, chan);
+}
+
+static void notice_gpr_rel_last_write(struct gpr_usage usage[128], int32_t id, unsigned chan)
+{
+	unsigned i;
+	for (i = 0; i < 128; ++i)
+		notice_gpr_last_write(&usage[i], id, chan);
 }
 
 static void notice_gpr_rel_write(struct gpr_usage usage[128], int32_t id, unsigned chan)
 {
-	/* we can't know wich gpr is really used, so ignore it for now*/
+	unsigned i;
+	for (i = 0; i < 128; ++i)
+		notice_gpr_write(&usage[i], id, chan, 1, -1);
 }
 
 static void notice_alu_src_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], int32_t id)
@@ -1389,18 +1409,14 @@ static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128]
 }
 
 static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128],
-				struct r600_bc_cf *export_cf[128], int32_t export_remap[128],
-				int32_t id)
+				struct r600_bc_cf *export_cf[128], int32_t export_remap[128])
 {
 	//TODO handle other memory operations
 	struct gpr_usage *output = &usage[cf->output.gpr];
-	int32_t last_write = (output->last_write + 0x100) & ~0xFF;
+	int32_t id = (output->last_write[0] + 0x100) & ~0xFF;
 
-	if (last_write != id && !export_cf[cf->output.gpr]) {
-		export_cf[cf->output.gpr] = cf;
-		export_remap[cf->output.gpr] = last_write;
-		id = last_write;
-	}
+	export_cf[cf->output.gpr] = cf;
+	export_remap[cf->output.gpr] = id;
 	if (cf->output.swizzle_x < 4)
 		notice_gpr_read(output, id, cf->output.swizzle_x);
 	if (cf->output.swizzle_y < 4)
@@ -1438,6 +1454,14 @@ static struct gpr_usage_range *find_dst_range(struct gpr_usage *usage, int32_t i
 	return NULL;
 }
 
+static int is_barrier_needed(struct gpr_usage *usage, int32_t id, unsigned chan, int32_t last_barrier)
+{
+	if (usage->last_write[chan] != (id & ~0xFF))
+		return usage->last_write[chan] >= last_barrier;
+	else
+		return 0;
+}
+
 static int is_intersection(struct gpr_usage_range* a, struct gpr_usage_range* b)
 {
 	return a->start <= b->end && b->start < a->end;
@@ -1532,8 +1556,10 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current, stru
 	}
 }
 
-static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], int32_t id)
+static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128],
+				int32_t id, int32_t last_barrier, unsigned *barrier)
 {
+	struct gpr_usage *cur_usage;
 	struct gpr_usage_range *range;
 	unsigned src, num_src;
 
@@ -1543,13 +1569,17 @@ static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128
 		if (!is_gpr(alu->src[src].sel))
 			continue;
 
-		range = find_src_range(&usage[alu->src[src].sel], id);
+		cur_usage = &usage[alu->src[src].sel];
+		range = find_src_range(cur_usage, id);
 		if (range->replacement != -1)
 			alu->src[src].sel = range->replacement;
+
+		*barrier |= is_barrier_needed(cur_usage, id, alu->src[src].chan, last_barrier);
 	}
 
 	if (alu->dst.write) {
-		range = find_dst_range(&usage[alu->dst.sel], id);
+		cur_usage = &usage[alu->dst.sel];
+		range = find_dst_range(cur_usage, id);
 		if (range->replacement == alu->dst.sel) {
 			if (!alu->is_op3)
 				alu->dst.write = 0;
@@ -1559,42 +1589,106 @@ static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128
 		} else if (range->replacement != -1) {
 			alu->dst.sel = range->replacement;
 		}
+		if (alu->dst.rel)
+			notice_gpr_rel_last_write(usage, id, alu->dst.chan);
+		else
+			notice_gpr_last_write(cur_usage, id, alu->dst.chan);
 	}
 }
 
-static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128], int32_t id)
+static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128],
+				int32_t id, int32_t last_barrier, unsigned *barrier)
 {
-	struct gpr_usage_range *range;
-	range = find_src_range(&usage[tex->src_gpr], id);
+	struct gpr_usage *cur_usage = &usage[tex->src_gpr];
+	struct gpr_usage_range *range = find_src_range(cur_usage, id);
+
+	if (tex->src_rel) {
+		*barrier = 1;
+        } else {
+		if (tex->src_sel_x < 4)
+			*barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_x, last_barrier);
+		if (tex->src_sel_y < 4)
+			*barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_y, last_barrier);
+		if (tex->src_sel_z < 4)
+			*barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_z, last_barrier);
+		if (tex->src_sel_w < 4)
+			*barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_w, last_barrier);
+	}
+
 	if (range->replacement != -1)
 		tex->src_gpr = range->replacement;
 
-	range = find_dst_range(&usage[tex->dst_gpr], id);
+	cur_usage = &usage[tex->dst_gpr];
+	range = find_dst_range(cur_usage, id);
 	if (range->replacement != -1)
 		tex->dst_gpr = range->replacement;
+
+	if (tex->dst_rel) {
+		if (tex->dst_sel_x != 7)
+			notice_gpr_rel_last_write(usage, id, tex->dst_sel_x);
+		if (tex->dst_sel_y != 7)
+			notice_gpr_rel_last_write(usage, id, tex->dst_sel_y);
+		if (tex->dst_sel_z != 7)
+			notice_gpr_rel_last_write(usage, id, tex->dst_sel_z);
+		if (tex->dst_sel_w != 7)
+			notice_gpr_rel_last_write(usage, id, tex->dst_sel_w);
+	} else {
+		if (tex->dst_sel_x != 7)
+			notice_gpr_last_write(cur_usage, id, tex->dst_sel_x);
+		if (tex->dst_sel_y != 7)
+			notice_gpr_last_write(cur_usage, id, tex->dst_sel_y);
+		if (tex->dst_sel_z != 7)
+			notice_gpr_last_write(cur_usage, id, tex->dst_sel_z);
+		if (tex->dst_sel_w != 7)
+			notice_gpr_last_write(cur_usage, id, tex->dst_sel_w);
+	}
 }
 
-static void replace_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128], int32_t id)
+static void replace_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128],
+				int32_t id, int32_t last_barrier, unsigned *barrier)
 {
-	struct gpr_usage_range *range;
-	range = find_src_range(&usage[vtx->src_gpr], id);
+	struct gpr_usage *cur_usage = &usage[vtx->src_gpr];
+	struct gpr_usage_range *range = find_src_range(cur_usage, id);
+
+	*barrier |= is_barrier_needed(cur_usage, id, vtx->src_sel_x, last_barrier);
+
 	if (range->replacement != -1)
 		vtx->src_gpr = range->replacement;
 
-	range = find_dst_range(&usage[vtx->dst_gpr], id);
+	cur_usage = &usage[vtx->dst_gpr];
+	range = find_dst_range(cur_usage, id);
 	if (range->replacement != -1)
 		vtx->dst_gpr = range->replacement;
+
+	if (vtx->dst_sel_x != 7)
+		notice_gpr_last_write(cur_usage, id, vtx->dst_sel_x);
+	if (vtx->dst_sel_y != 7)
+		notice_gpr_last_write(cur_usage, id, vtx->dst_sel_y);
+	if (vtx->dst_sel_z != 7)
+		notice_gpr_last_write(cur_usage, id, vtx->dst_sel_z);
+	if (vtx->dst_sel_w != 7)
+		notice_gpr_last_write(cur_usage, id, vtx->dst_sel_w);
 }
 
-static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], int32_t id)
+static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128],
+				int32_t id, int32_t last_barrier)
 {
 	//TODO handle other memory operations
-	struct gpr_usage_range *range;
-	range = find_src_range(&usage[cf->output.gpr], id);
+	struct gpr_usage *cur_usage = &usage[cf->output.gpr];
+	struct gpr_usage_range *range = find_src_range(cur_usage, id);
+
+	cf->barrier = 0;
+	if (cf->output.swizzle_x < 4)
+		cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_x, last_barrier);
+	if (cf->output.swizzle_y < 4)
+		cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_y, last_barrier);
+	if (cf->output.swizzle_z < 4)
+		cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_z, last_barrier);
+	if (cf->output.swizzle_w < 4)
+		cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_w, last_barrier);
+
 	if (range->replacement != -1)
 		cf->output.gpr = range->replacement;
-
-	cf->output.barrier = 1;
 }
 
 static void optimize_alu_inst(struct r600_bc_cf *cf, struct r600_bc_alu *alu)
@@ -1657,17 +1751,20 @@ static void r600_bc_optimize(struct r600_bc *bc)
 	struct r600_bc_cf *export_cf[128] = { NULL };
 	int32_t export_remap[128];
 
-	int32_t id, stack_start_id = -1;
-	unsigned i, j, stack = 0, predicate;
+	int32_t id, barrier[bc->nstack];
+	unsigned i, j, stack, predicate, old_stack;
 
 	memset(&usage, 0, sizeof(usage));
 	for (i = 0; i < 128; ++i) {
 		usage[i].first_write = -1;
-		usage[i].last_write = -1;
+		usage[i].last_write[0] = -1;
+		usage[i].last_write[1] = -1;
+		usage[i].last_write[2] = -1;
+		usage[i].last_write[3] = -1;
 	}
 
 	/* first gather some informations about the gpr usage */
-	id = 0;
+	id = 0; stack = 0;
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
 		switch (get_cf_class(cf)) {
 		case CF_CLASS_ALU:
@@ -1703,7 +1800,7 @@ static void r600_bc_optimize(struct r600_bc *bc)
 			}
 			break;
 		case CF_CLASS_EXPORT:
-			notice_export_gprs(cf, usage, export_cf, export_remap, id);
+			notice_export_gprs(cf, usage, export_cf, export_remap);
 			continue; // don't increment id
 		case CF_CLASS_OTHER:
 			switch (cf->inst) {
@@ -1721,18 +1818,6 @@ static void r600_bc_optimize(struct r600_bc *bc)
 				goto out;
 			}
 		}
-		/* ensue exports are placed outside of conditional blocks */
-		if (stack && stack_start_id == -1)
-			stack_start_id = id & ~0xFF;
-		else if (!stack && stack_start_id != -1) {
-			for (i = 0; i < 124; ++i) {
-				if ((usage[i].last_write & ~0xFF) >= stack_start_id) {
-					usage[i].last_write = id & ~0xFF;
-				}
-			}
-			stack_start_id = -1;
-		}
-
 		id += 0x100;
 	        id &= ~0xFF;
 	}
@@ -1758,46 +1843,90 @@ static void r600_bc_optimize(struct r600_bc *bc)
 	bc->ngpr++;
 
 	/* apply the changes */
-	id = 0;
+
+	for (i = 0; i < 128; ++i) {
+		usage[i].last_write[0] = -1;
+		usage[i].last_write[1] = -1;
+		usage[i].last_write[2] = -1;
+		usage[i].last_write[3] = -1;
+	}
+	barrier[0] = 0;
+	id = 0; stack = 0;
 	LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
+		old_stack = stack;
 		switch (get_cf_class(cf)) {
 		case CF_CLASS_ALU:
+			predicate = 0;
+			first = NULL;
+			cf->barrier = 0;
 			LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
-				replace_alu_gprs(alu, usage, id);
+				replace_alu_gprs(alu, usage, id, barrier[stack], &cf->barrier);
 				if (alu->last)
 					++id;
 
-				optimize_alu_inst(cf, alu);
+				if (is_alu_pred_inst(alu))
+					predicate++;
+
+				if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)
+					optimize_alu_inst(cf, alu);
 			}
+			if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3)
+				stack += predicate;
+			else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)
+				stack -= 1;
+			else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)
+				stack -= 2;
 			if (LIST_IS_EMPTY(&cf->alu)) {
 				r600_bc_remove_cf(bc, cf);
+				cf = NULL;
 			}
 			break;
 		case CF_CLASS_TEXTURE:
+			cf->barrier = 0;
 			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
-				replace_tex_gprs(tex, usage, id++);
+				replace_tex_gprs(tex, usage, id++, barrier[stack], &cf->barrier);
 			}
 			break;
 		case CF_CLASS_VERTEX:
+			cf->barrier = 0;
 			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
-				replace_vtx_gprs(vtx, usage, id++);
+				replace_vtx_gprs(vtx, usage, id++, barrier[stack], &cf->barrier);
 			}
 			break;
 		case CF_CLASS_EXPORT:
-			replace_export_gprs(cf, usage, id);
 			continue; // don't increment id
 		case CF_CLASS_OTHER:
+			if (cf->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
+				cf->barrier = 0;
+				stack -= cf->pop_count;
+			}
 			break;
 		}
-	        id += 0x100;
-	        id &= ~0xFF;
+
+		id &= ~0xFF;
+		if (cf && cf->barrier)
+			barrier[old_stack] = id;
+
+		for (i = old_stack + 1; i <= stack; ++i)
+			barrier[i] = barrier[old_stack];
+
+		id += 0x100;
+		if (stack != 0) /* ensue exports are placed outside of conditional blocks */
+			continue;
+
 		for (i = 0; i < 128; ++i) {
-			if (export_cf[i] && id == export_remap[i]) {
-				r600_bc_move_cf(bc, export_cf[i], cf);
-				replace_export_gprs(export_cf[i], usage, id);
-			}
+			if (!export_cf[i] || id < export_remap[i])
+				continue;
+
+			r600_bc_move_cf(bc, export_cf[i], next_cf);
+			replace_export_gprs(export_cf[i], usage, export_remap[i], barrier[stack]);
+			if (export_cf[i]->barrier)
+				barrier[stack] = id - 1;
+			next_cf = LIST_ENTRY(struct r600_bc_cf, export_cf[i]->list.next, list);
+			export_cf[i] = NULL;
 		}
 	}
+	assert(stack == 0);
 
 out:
 	for (i = 0; i < 128; ++i) {
@@ -1827,7 +1956,7 @@ int r600_bc_build(struct r600_bc *bc)
 
 	/* first path compute addr of each CF block */
 	/* addr start after all the CF instructions */
-	addr = bc->cf_last->id + 2;
+	addr = LIST_ENTRY(struct r600_bc_cf, bc->cf.prev, list)->id + 2;
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
 		switch (get_cf_class(cf)) {
 		case CF_CLASS_ALU:
@@ -2020,6 +2149,7 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache1_mode);
 			fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache0_addr);
 			fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache1_addr);
+			fprintf(stderr, "BARRIER:%d ", cf->barrier);
 			fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
 			break;
 		case CF_CLASS_TEXTURE:
@@ -2029,6 +2159,7 @@ void r600_bc_dump(struct r600_bc *bc)
 			id++;
 			fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
 			fprintf(stderr, "INST:%d ", cf->inst);
+			fprintf(stderr, "BARRIER:%d ", cf->barrier);
 			fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
 			break;
 		case CF_CLASS_EXPORT:
@@ -2044,7 +2175,7 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
 			fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
 			fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
-			fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
+			fprintf(stderr, "BARRIER:%d ", cf->barrier);
 			fprintf(stderr, "INST:%d\n", cf->inst);
 			break;
 		case CF_CLASS_OTHER:
@@ -2054,6 +2185,7 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
 			fprintf(stderr, "INST:%d ", cf->inst);
 			fprintf(stderr, "COND:%X ", cf->cond);
+			fprintf(stderr, "BARRIER:%d ", cf->barrier);
 			fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
 			break;
 		}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 112f6f0af7a..6a1c85217b7 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -114,7 +114,6 @@ struct r600_bc_output {
 	unsigned			swizzle_y;
 	unsigned			swizzle_z;
 	unsigned			swizzle_w;
-	unsigned			barrier;
 };
 
 struct r600_bc_cf {
@@ -126,6 +125,7 @@ struct r600_bc_cf {
 	unsigned			cond;
 	unsigned			pop_count;
 	unsigned			cf_addr; /* control flow addr */
+	unsigned			barrier;
 	unsigned			kcache0_mode;
 	unsigned			kcache1_mode;
 	unsigned			kcache0_addr;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index e3a72f89c66..d78e249ae95 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -603,7 +603,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 		output[i].swizzle_y = 1;
 		output[i].swizzle_z = 2;
 		output[i].swizzle_w = 3;
-		output[i].barrier = i == 0;
 		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
 		output[i].array_base = i - pos0;
 		switch (ctx.type) {
@@ -665,7 +664,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 			output[i].swizzle_y = 1;
 			output[i].swizzle_z = 2;
 			output[i].swizzle_w = 3;
-			output[i].barrier = 1;
 			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
 			output[i].array_base = 0;
 			noutput++;
@@ -680,7 +678,6 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 		output[0].swizzle_y = 7;
 		output[0].swizzle_z = 7;
 		output[0].swizzle_w = 7;
-		output[0].barrier = 1;
 		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
 		output[0].array_base = 0;
 		noutput++;
-- 
cgit v1.2.3


From ef4def1d9a2a48c7e32ea3e6bf0294470dfbf4c8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 8 Jan 2011 11:11:48 +0100
Subject: r600g: join export instructions

---
 src/gallium/drivers/r600/eg_asm.c   |  3 +-
 src/gallium/drivers/r600/r600_asm.c | 74 +++++++++++++++++++++++++++++++++----
 src/gallium/drivers/r600/r600_asm.h |  1 +
 3 files changed, 69 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 4233afa8c53..1bb4c6b2afb 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -63,7 +63,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
 			S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
 			S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
-		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
+		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index e78c1298332..bee1c941e5d 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -233,6 +233,7 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 		return r;
 	bc->cf_last->inst = BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
 	memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output));
+	bc->cf_last->output.burst_count = 1;
 	return 0;
 }
 
@@ -1206,7 +1207,8 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
 			S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
 			S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
-		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
+		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
@@ -1436,7 +1438,6 @@ static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int32_t i
 		if (range->start < id && id <= range->end)
 			return range;
 	}
-	assert(0); /* should not happen */
 	return NULL;
 }
 
@@ -1488,14 +1489,15 @@ static int rate_replacement(struct gpr_usage *usage, struct gpr_usage_range* ran
 	return best_start + best_end;
 }
 
-static void find_replacement(struct gpr_usage usage[128], unsigned current, struct gpr_usage_range *range)
+static void find_replacement(struct gpr_usage usage[128], unsigned current,
+				struct gpr_usage_range *range, int is_export)
 {
 	unsigned i;
 	int best_gpr = -1, best_rate = 0x7FFFFFFF;
 
 	if (range->replacement != -1 && range->replacement <= current) {
 		struct gpr_usage_range *other = find_src_range(&usage[range->replacement], range->start);
-		if (other->replacement != -1)
+		if (other && other->replacement != -1)
 			range->replacement = other->replacement;
 	}
 
@@ -1523,7 +1525,7 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current, stru
 				best_gpr = i;
 
 				/* can't get better than this */
-				if (rate == 0)
+				if (rate == 0 || is_export)
 					break;
 			}
 		}
@@ -1556,6 +1558,22 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current, stru
 	}
 }
 
+static void find_export_replacement(struct gpr_usage usage[128],
+				struct gpr_usage_range *range, struct r600_bc_cf *current,
+				struct r600_bc_cf *next, int32_t next_id)
+{
+	if (!next || next_id <= range->start || next_id > range->end)
+		return;
+
+	if (current->output.type != next->output.type)
+		return;
+
+	if ((current->output.array_base + 1) != next->output.array_base)
+		return;
+
+	find_src_range(&usage[next->output.gpr], next_id)->replacement = range->replacement + 1;
+}
+
 static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128],
 				int32_t id, int32_t last_barrier, unsigned *barrier)
 {
@@ -1738,6 +1756,36 @@ static void optimize_alu_inst(struct r600_bc_cf *cf, struct r600_bc_alu *alu)
 	}
 }
 
+static void optimize_export_inst(struct r600_bc *bc, struct r600_bc_cf *cf)
+{
+	struct r600_bc_cf *prev = LIST_ENTRY(struct r600_bc_cf, cf->list.prev, list);
+	if (&prev->list == &bc->cf ||
+		prev->inst != cf->inst ||
+		prev->output.type != cf->output.type ||
+		prev->output.elem_size != cf->output.elem_size ||
+		prev->output.swizzle_x != cf->output.swizzle_x ||
+		prev->output.swizzle_y != cf->output.swizzle_y ||
+		prev->output.swizzle_z != cf->output.swizzle_z ||
+		prev->output.swizzle_w != cf->output.swizzle_w)
+		return;
+
+	if ((prev->output.burst_count + cf->output.burst_count) > 16)
+		return;
+
+	if ((prev->output.gpr + prev->output.burst_count) == cf->output.gpr &&
+		(prev->output.array_base + prev->output.burst_count) == cf->output.array_base) {
+
+		prev->output.burst_count += cf->output.burst_count;
+		r600_bc_remove_cf(bc, cf);
+
+	} else if (prev->output.gpr == (cf->output.gpr + cf->output.burst_count) &&
+		prev->output.array_base == (cf->output.array_base + cf->output.burst_count)) {
+
+		cf->output.burst_count += prev->output.burst_count;
+		r600_bc_remove_cf(bc, prev);
+	}
+}
+
 static void r600_bc_optimize(struct r600_bc *bc)
 {
 	struct r600_bc_cf *cf, *next_cf;
@@ -1827,23 +1875,31 @@ static void r600_bc_optimize(struct r600_bc *bc)
 	for (i = 0; i < 124; ++i) {
 		for (j = 0; j < usage[i].nranges; ++j) {
 			struct gpr_usage_range *range = &usage[i].ranges[j];
+			int is_export = export_cf[i] && export_cf[i + 1] &&
+				range->start < export_remap[i] &&
+				export_remap[i] <= range->end;
+
 			if (range->start == -1)
 				range->replacement = -1;
 			else if (range->end == -1)
 				range->replacement = i;
 			else
-				find_replacement(usage, i, range);
+				find_replacement(usage, i, range, is_export);
 
 			if (range->replacement == -1)
 				bc->ngpr = i;
 			else if (range->replacement < i && range->replacement > bc->ngpr)
 				bc->ngpr = range->replacement;
+
+			if (is_export && range->replacement != -1) {
+				find_export_replacement(usage, range, export_cf[i],
+							export_cf[i + 1], export_remap[i + 1]);
+			}
 		}
 	}
 	bc->ngpr++;
 
 	/* apply the changes */
-
 	for (i = 0; i < 128; ++i) {
 		usage[i].last_write[0] = -1;
 		usage[i].last_write[1] = -1;
@@ -1923,6 +1979,7 @@ static void r600_bc_optimize(struct r600_bc *bc)
 			if (export_cf[i]->barrier)
 				barrier[stack] = id - 1;
 			next_cf = LIST_ENTRY(struct r600_bc_cf, export_cf[i]->list.next, list);
+			optimize_export_inst(bc, export_cf[i]);
 			export_cf[i] = NULL;
 		}
 	}
@@ -2176,7 +2233,8 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
 			fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
 			fprintf(stderr, "BARRIER:%d ", cf->barrier);
-			fprintf(stderr, "INST:%d\n", cf->inst);
+			fprintf(stderr, "INST:%d ", cf->inst);
+			fprintf(stderr, "BURST_COUNT:%d\n", cf->output.burst_count);
 			break;
 		case CF_CLASS_OTHER:
 			fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 6a1c85217b7..6059e45737f 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -114,6 +114,7 @@ struct r600_bc_output {
 	unsigned			swizzle_y;
 	unsigned			swizzle_z;
 	unsigned			swizzle_w;
+	unsigned			burst_count;
 };
 
 struct r600_bc_cf {
-- 
cgit v1.2.3


From 9bf8adc45e1b90b33e3900eef3a682a263b9e1bb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 9 Jan 2011 13:18:30 +0100
Subject: r600g: some merge fixes

---
 src/gallium/drivers/r600/r600_asm.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index b15758adc33..31f386964a1 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1020,7 +1020,15 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 			r600_bc_special_constants(
 				nalu->src[i].value[nalu->src[i].chan],
 				&nalu->src[i].sel, &nalu->src[i].neg);
+
+		if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
+			bc->ngpr = nalu->src[i].sel + 1;
+		}
 	}
+	if (nalu->dst.sel >= bc->ngpr) {
+		bc->ngpr = nalu->dst.sel + 1;
+	}
+
 	LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
 	/* each alu use 2 dwords */
 	bc->cf_last->ndw += 2;
@@ -2331,15 +2339,15 @@ void r600_bc_dump(struct r600_bc *bc)
 		case CF_CLASS_ALU:
 			fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
 			fprintf(stderr, "ADDR:%04d ", cf->addr);
-			fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache0_mode);
-			fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache0_bank);
-			fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache1_bank);
+			fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode);
+			fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank);
+			fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank);
 			id++;
 			fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
 			fprintf(stderr, "INST:%d ", cf->inst);
-			fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache1_mode);
-			fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache0_addr);
-			fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache1_addr);
+			fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode);
+			fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr);
+			fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr);
 			fprintf(stderr, "BARRIER:%d ", cf->barrier);
 			fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
 			break;
-- 
cgit v1.2.3


From 3789a480edb271bc9e512c7e901dbfc96aafe955 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 9 Jan 2011 13:18:48 +0100
Subject: r600g: check if hardware blits are possible bevore enabling tilling

---
 src/gallium/drivers/r600/r600_texture.c | 99 +++++++++++++++++----------------
 1 file changed, 50 insertions(+), 49 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 6add92e6d4c..1f4f453c091 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -321,6 +321,48 @@ r600_texture_create_object(struct pipe_screen *screen,
 	return rtex;
 }
 
+/* Figure out whether u_blitter will fallback to a transfer operation.
+ * If so, don't use a staging resource.
+ */
+static boolean permit_hardware_blit(struct pipe_screen *screen,
+					const struct pipe_resource *res)
+{
+	unsigned bind;
+
+	if (util_format_is_depth_or_stencil(res->format))
+		bind = PIPE_BIND_DEPTH_STENCIL;
+	else
+		bind = PIPE_BIND_RENDER_TARGET;
+
+	/* See r600_resource_copy_region: there is something wrong
+	 * with depth resource copies at the moment so avoid them for
+	 * now.
+	 */
+	if (util_format_get_component_bits(res->format,
+				UTIL_FORMAT_COLORSPACE_ZS,
+				0) != 0)
+		return FALSE;
+
+	if (!screen->is_format_supported(screen,
+				res->format,
+				res->target,
+				res->nr_samples,
+				bind, 0))
+		return FALSE;
+
+	if (!screen->is_format_supported(screen,
+				res->format,
+				res->target,
+				res->nr_samples,
+				PIPE_BIND_SAMPLER_VIEW, 0))
+		return FALSE;
+
+	if (res->usage == PIPE_USAGE_STREAM)
+		return FALSE;
+
+	return TRUE;
+}
+
 struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
 						const struct pipe_resource *templ)
 {
@@ -332,7 +374,7 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
 	if (force_tiling == -1)
 		force_tiling = debug_get_bool_option("R600_FORCE_TILING", FALSE);
 
-	if (force_tiling) {
+	if (force_tiling && permit_hardware_blit(screen, templ)) {
 		if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
 		    !(templ->bind & PIPE_BIND_SCANOUT)) {
 			array_mode = V_038000_ARRAY_2D_TILED_THIN1;
@@ -485,46 +527,6 @@ static INLINE unsigned u_box_volume( const struct pipe_box *box )
 	return box->width * box->depth * box->height;
 };
 
-
-/* Figure out whether u_blitter will fallback to a transfer operation.
- * If so, don't use a staging resource.
- */
-static boolean permit_hardware_blit(struct pipe_screen *screen,
-					struct pipe_resource *res)
-{
-	unsigned bind;
-
-	if (util_format_is_depth_or_stencil(res->format))
-		bind = PIPE_BIND_DEPTH_STENCIL;
-	else
-		bind = PIPE_BIND_RENDER_TARGET;
-
-	/* See r600_resource_copy_region: there is something wrong
-	 * with depth resource copies at the moment so avoid them for
-	 * now.
-	 */
-	if (util_format_get_component_bits(res->format,
-				UTIL_FORMAT_COLORSPACE_ZS,
-				0) != 0)
-		return FALSE;
-
-	if (!screen->is_format_supported(screen,
-				res->format,
-				res->target,
-				res->nr_samples,
-				bind, 0))
-		return FALSE;
-
-	if (!screen->is_format_supported(screen,
-				res->format,
-				res->target,
-				res->nr_samples,
-				PIPE_BIND_SAMPLER_VIEW, 0))
-		return FALSE;
-
-	return TRUE;
-}
-
 struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
 						struct pipe_resource *texture,
 						unsigned level,
@@ -562,8 +564,7 @@ struct pipe_transfer* r600_texture_get_transfer(struct pipe_context *ctx,
 		use_staging_texture = TRUE;
 
 	if (!permit_hardware_blit(ctx->screen, texture) ||
-		(texture->flags & R600_RESOURCE_FLAG_TRANSFER) ||
-		(texture->usage == PIPE_USAGE_STREAM))
+		(texture->flags & R600_RESOURCE_FLAG_TRANSFER))
 		use_staging_texture = FALSE;
 
 	trans = CALLOC_STRUCT(r600_transfer);
@@ -795,7 +796,7 @@ static unsigned r600_get_swizzle_combined(const unsigned char *swizzle_format,
 
 /* texture format translate */
 uint32_t r600_translate_texformat(enum pipe_format format,
-				  const unsigned char *swizzle_view, 
+				  const unsigned char *swizzle_view,
 				  uint32_t *word4_p, uint32_t *yuv_format_p)
 {
 	uint32_t result = 0, word4 = 0, yuv_format = 0;
@@ -849,7 +850,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
 			break;
 		}
 		goto out_unknown; /* TODO */
-		
+
 	case UTIL_FORMAT_COLORSPACE_SRGB:
 		word4 |= S_038010_FORCE_DEGAMMA(1);
 		if (format == PIPE_FORMAT_L8A8_SRGB || format == PIPE_FORMAT_L8_SRGB)
@@ -865,7 +866,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
 		static int r600_enable_s3tc = -1;
 
 		if (r600_enable_s3tc == -1)
-			r600_enable_s3tc = 
+			r600_enable_s3tc =
 				debug_get_bool_option("R600_ENABLE_S3TC", FALSE);
 
 		if (!r600_enable_s3tc)
@@ -888,7 +889,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
 	}
 
 
-	for (i = 0; i < desc->nr_channels; i++) {	
+	for (i = 0; i < desc->nr_channels; i++) {
 		if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
 			word4 |= sign_bit[i];
 		}
@@ -902,7 +903,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
 	for (i = 1; i < desc->nr_channels; i++) {
 		uniform = uniform && desc->channel[0].size == desc->channel[i].size;
 	}
-	
+
 	/* Non-uniform formats. */
 	if (!uniform) {
 		switch(desc->nr_channels) {
@@ -1020,7 +1021,7 @@ uint32_t r600_translate_texformat(enum pipe_format format,
 				goto out_word4;
 			}
 		}
-		
+
 	}
 out_word4:
 	if (word4_p)
-- 
cgit v1.2.3


From 4025958e1bfcd7fd75d476f8f5687b983e730b71 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 9 Jan 2011 14:19:14 +0100
Subject: [g3dvl] use a table of empty block mask instead of calculating it

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 46 +++++++++++-------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  1 +
 2 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index b195d7e2c39..0933049c1c4 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -94,6 +94,12 @@ enum VS_OUTPUT
    VS_O_MV3
 };
 
+static const unsigned const_empty_block_mask_420[3][2][2] = {
+        { { 0x20, 0x10 },  { 0x08, 0x04 } },
+        { { 0x02, 0x02 },  { 0x02, 0x02 } },
+        { { 0x01, 0x01 },  { 0x01, 0x01 } }
+};
+
 static void *
 create_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
@@ -125,14 +131,14 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    bkwd_pred = ureg_DECL_vs_input(shader, VS_I_BKWD_PRED);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
-   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);   
+   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
    o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
    o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
-   o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);   
+   o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);
    o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0);
    o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1);
    o_info = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO);
-   
+
    for (i = 0; i < 4; ++i) {
      vmv[i] = ureg_DECL_vs_input(shader, VS_I_MV0 + i);
      o_vmv[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i);
@@ -217,7 +223,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
 
    ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_scalar(vrect, TGSI_SWIZZLE_Y));
-   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), 
+   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
       vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
 
    ureg_IF(shader, ureg_scalar(interlaced, TGSI_SWIZZLE_X), &label);
@@ -712,20 +718,6 @@ get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4])
    }
 }
 
-static bool
-empty_block(enum pipe_video_chroma_format chroma_format,
-            unsigned cbp, unsigned component,
-            unsigned x, unsigned y)
-{
-   /* TODO: Implement 422, 444 */
-   assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-
-   if(component == 0) /*luma*/
-      return !(cbp  & (1 << (5 - (x + y * 2))));
-   else /*cr cb*/
-      return !(cbp & (1 << (2 - component)));
-}
-
 static void
 grab_vectors(struct vl_mpeg12_mc_renderer *r,
              struct vl_mpeg12_mc_buffer *buffer,
@@ -742,9 +734,9 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
    stream.pos.y = mb->mby;
    for ( i = 0; i < 2; ++i) {
       for ( j = 0; j < 2; ++j) {
-         stream.eb[i][j].y = empty_block(r->chroma_format, mb->cbp, 0, j, i);
-         stream.eb[i][j].cr = empty_block(r->chroma_format, mb->cbp, 1, j, i);
-         stream.eb[i][j].cb = empty_block(r->chroma_format, mb->cbp, 2, j, i);         
+         stream.eb[i][j].y = !(mb->cbp & (*r->empty_block_mask)[0][i][j]);
+         stream.eb[i][j].cr = !(mb->cbp & (*r->empty_block_mask)[1][i][j]);
+         stream.eb[i][j].cb = !(mb->cbp & (*r->empty_block_mask)[2][i][j]);
       }
    }
    stream.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
@@ -759,7 +751,7 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
       case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
          stream.ref_frames = 1.0f;
          break;
-        
+
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
          stream.ref_frames = 0.0f;
          break;
@@ -786,7 +778,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r,
 
    for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
-         if (!empty_block(r->chroma_format, cbp, 0, x, y)) {
+         if (cbp & (*r->empty_block_mask)[0][y][x]) {
             vl_idct_add_block(&buffer->idct_y, mbx * 2 + x, mby * 2 + y, blocks);
             blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
          }
@@ -797,7 +789,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r,
    assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 
    for (tb = 1; tb < 3; ++tb) {
-      if (!empty_block(r->chroma_format, cbp, tb, 0, 0)) {
+      if (cbp & (*r->empty_block_mask)[tb][0][0]) {
          if(tb == 1)
             vl_idct_add_block(&buffer->idct_cb, mbx, mby, blocks);
          else
@@ -860,6 +852,10 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    renderer->chroma_format = chroma_format;
    renderer->bufmode = bufmode;
 
+   /* TODO: Implement 422, 444 */
+   assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+   renderer->empty_block_mask = &const_empty_block_mask_420;
+
    renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
                                            texview_map_delete);
    if (!renderer->texview_map)
@@ -959,7 +955,7 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, renderer->quad.buffer);
 
    buffer->vertex_bufs.individual.stream = vl_vb_init(
-      &buffer->vertex_stream, renderer->pipe, renderer->macroblocks_per_batch, 
+      &buffer->vertex_stream, renderer->pipe, renderer->macroblocks_per_batch,
       sizeof(struct vertex_stream) / sizeof(float),
       renderer->vertex_stream_stride);
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 79e872c2bee..76d6e25ca36 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -52,6 +52,7 @@ struct vl_mpeg12_mc_renderer
    unsigned buffer_width;
    unsigned buffer_height;
    enum pipe_video_chroma_format chroma_format;
+   const unsigned (*empty_block_mask)[3][2][2];
    enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode;
    unsigned macroblocks_per_batch;
 
-- 
cgit v1.2.3


From c8236aaf7056bd8645804e71596d2d6460e62d15 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 10 Jan 2011 19:15:42 +0100
Subject: [g3dvl] move to integer verticies

---
 src/gallium/auxiliary/vl/vl_idct.c               | 22 +++---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 97 +++++++++---------------
 src/gallium/auxiliary/vl/vl_types.h              |  5 ++
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     |  8 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     | 18 ++---
 5 files changed, 66 insertions(+), 84 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index b84b447ce6b..5d472f93481 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -175,11 +175,11 @@ increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
    unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
    unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
 
-   /* 
-    * daddr[0..1].(start) = saddr[0..1].(start) 
-    * daddr[0..1].(tc) = saddr[0..1].(tc) 
+   /*
+    * daddr[0..1].(start) = saddr[0..1].(start)
+    * daddr[0..1].(tc) = saddr[0..1].(tc)
     */
-   
+
    ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
    ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
    ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
@@ -256,7 +256,7 @@ create_matrix_frag_shader(struct vl_idct *idct)
       s_addr[1] = i == 0 ? l_addr[1] : ureg_src(l[i][1]);
       fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1));
    }
-   
+
    for (i = 0; i < NR_RENDER_TARGETS; ++i) {
       if(i > 0)
          increment_addr(shader, r, r_addr, true, true, i, BLOCK_HEIGHT);
@@ -334,7 +334,7 @@ init_shaders(struct vl_idct *idct)
    idct->transpose_vs = create_vert_shader(idct, false);
    idct->transpose_fs = create_transpose_frag_shader(idct);
 
-   return 
+   return
       idct->matrix_vs != NULL &&
       idct->matrix_fs != NULL &&
       idct->transpose_vs != NULL &&
@@ -396,7 +396,7 @@ init_state(struct vl_idct *idct)
    vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
 
    /* Pos element */
-   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
 
    idct->vertex_buffer_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1);
    idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
@@ -483,7 +483,7 @@ init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, idct->quad.buffer);
 
    buffer->vertex_bufs.individual.pos = vl_vb_init(
-      &buffer->blocks, idct->pipe, idct->max_blocks, 2,
+      &buffer->blocks, idct->pipe, idct->max_blocks,
       idct->vertex_buffer_stride);
 
    if(buffer->vertex_bufs.individual.pos.buffer == NULL)
@@ -553,7 +553,7 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    return matrix;
 }
 
-bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, 
+bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
                   struct pipe_resource *matrix)
 {
@@ -701,7 +701,7 @@ vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 void
 vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block)
 {
-   struct vertex2f v;
+   struct vertex2s v;
    unsigned tex_pitch;
    short *texels;
 
@@ -717,7 +717,7 @@ vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *
 
    v.x = x;
    v.y = y;
-   vl_vb_add_block(&buffer->blocks, (float*)&v);
+   vl_vb_add_block(&buffer->blocks, &v);
 }
 
 void
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 0933049c1c4..922bff73717 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -45,35 +45,28 @@
 
 struct vertex_stream
 {
-   struct vertex2f pos;
+   struct vertex2s pos;
+   struct vertex2s mv[4];
    struct {
-      float y;
-      float cr;
-      float cb;
+      int8_t y;
+      int8_t cr;
+      int8_t cb;
+      int8_t flag;
    } eb[2][2];
-   float interlaced;
-   float frame_pred;
-   float ref_frames;
-   float bkwd_pred;
-   struct vertex2f mv[4];
 };
 
 enum VS_INPUT
 {
    VS_I_RECT,
    VS_I_VPOS,
-   VS_I_EB_0_0,
-   VS_I_EB_0_1,
-   VS_I_EB_1_0,
-   VS_I_EB_1_1,
-   VS_I_INTERLACED,
-   VS_I_FRAME_PRED,
-   VS_I_REF_FRAMES,
-   VS_I_BKWD_PRED,
    VS_I_MV0,
    VS_I_MV1,
    VS_I_MV2,
    VS_I_MV3,
+   VS_I_EB_0_0,
+   VS_I_EB_0_1,
+   VS_I_EB_1_0,
+   VS_I_EB_1_1,
 
    NUM_VS_INPUTS
 };
@@ -106,7 +99,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    struct ureg_program *shader;
    struct ureg_src block_scale, mv_scale;
    struct ureg_src vrect, vpos, eb[2][2], vmv[4];
-   struct ureg_src interlaced, frame_pred, ref_frames, bkwd_pred;
    struct ureg_dst t_vpos, t_vtex, t_vmv;
    struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4], o_info;
    unsigned i, label;
@@ -125,10 +117,6 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    eb[1][0] = ureg_DECL_vs_input(shader, VS_I_EB_1_0);
    eb[0][1] = ureg_DECL_vs_input(shader, VS_I_EB_0_1);
    eb[1][1] = ureg_DECL_vs_input(shader, VS_I_EB_1_1);
-   interlaced = ureg_DECL_vs_input(shader, VS_I_INTERLACED);
-   frame_pred = ureg_DECL_vs_input(shader, VS_I_FRAME_PRED);
-   ref_frames = ureg_DECL_vs_input(shader, VS_I_REF_FRAMES);
-   bkwd_pred = ureg_DECL_vs_input(shader, VS_I_BKWD_PRED);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
    o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
@@ -165,7 +153,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
     * o_line.xy = vrect * 8
     * o_line.z = interlaced
     *
-    * if(interlaced) {
+    * if(eb[0][0].w) { //interlaced
     *    t_vtex.x = vrect.x
     *    t_vtex.y = vrect.y * 0.5
     *    t_vtex += vpos
@@ -200,21 +188,23 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
             ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
             eb[1][1], eb[1][0]);
 
-   ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_X), ref_frames);
+   ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_X),
+            ureg_scalar(eb[1][1], TGSI_SWIZZLE_W));
    ureg_SGE(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Y),
-      ureg_scalar(ref_frames, TGSI_SWIZZLE_X), ureg_imm1f(shader, 0.0f));
-   ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Z), ureg_scalar(bkwd_pred, TGSI_SWIZZLE_X));
+      ureg_scalar(eb[1][1], TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.0f));
+   ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Z),
+            ureg_scalar(eb[1][0], TGSI_SWIZZLE_W));
 
    ureg_MAD(shader, ureg_writemask(o_vmv[0], TGSI_WRITEMASK_XY), mv_scale, vmv[0], ureg_src(t_vpos));
    ureg_MAD(shader, ureg_writemask(o_vmv[2], TGSI_WRITEMASK_XY), mv_scale, vmv[2], ureg_src(t_vpos));
 
    ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)),
+            ureg_negate(ureg_scalar(eb[0][1], TGSI_SWIZZLE_W)),
             vmv[0], vmv[1]);
    ureg_MAD(shader, ureg_writemask(o_vmv[1], TGSI_WRITEMASK_XY), mv_scale, ureg_src(t_vmv), ureg_src(t_vpos));
 
    ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(frame_pred, TGSI_SWIZZLE_X)),
+            ureg_negate(ureg_scalar(eb[0][1], TGSI_SWIZZLE_W)),
             vmv[2], vmv[3]);
    ureg_MAD(shader, ureg_writemask(o_vmv[3], TGSI_WRITEMASK_XY), mv_scale, ureg_src(t_vmv), ureg_src(t_vpos));
 
@@ -226,7 +216,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
       vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
 
-   ureg_IF(shader, ureg_scalar(interlaced, TGSI_SWIZZLE_X), &label);
+   ureg_IF(shader, ureg_scalar(eb[0][0], TGSI_SWIZZLE_W), &label);
 
       ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
       ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
@@ -563,37 +553,25 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    r->quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch);
 
    /* Position element */
-   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
+
+   for (i = 0; i < 4; ++i)
+      /* motion vector 0..4 element */
+      vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R16G16_SSCALED;
 
    /* y, cr, cb empty block element top left block */
-   vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+   vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
 
    /* y, cr, cb empty block element top right block */
-   vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+   vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
 
    /* y, cr, cb empty block element bottom left block */
-   vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
+   vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
 
    /* y, cr, cb empty block element bottom right block */
-   vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R32G32B32_FLOAT;
-
-   /* progressive=0.0f interlaced=1.0f */
-   vertex_elems[VS_I_INTERLACED].src_format = PIPE_FORMAT_R32_FLOAT;
-
-   /* frame=0.0f field=1.0f */
-   vertex_elems[VS_I_FRAME_PRED].src_format = PIPE_FORMAT_R32_FLOAT;
-
-   /* intra=-1.0f forward/backward=1.0f bi=0.0f */
-   vertex_elems[VS_I_REF_FRAMES].src_format = PIPE_FORMAT_R32_FLOAT;
-
-   /* forward=0.0f backward=1.0f */
-   vertex_elems[VS_I_BKWD_PRED].src_format = PIPE_FORMAT_R32_FLOAT;
-
-   for (i = 0; i < 4; ++i)
-      /* motion vector 0..4 element */
-      vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R32G32_FLOAT;
+   vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
 
-   r->vertex_stream_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 13, 1);
+   r->vertex_stream_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 9, 1);
 
    r->vertex_elems_state = r->pipe->create_vertex_elements_state(
       r->pipe, NUM_VS_INPUTS, vertex_elems);
@@ -652,7 +630,7 @@ static struct pipe_sampler_view
 }
 
 static void
-get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2f mv[4])
+get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4])
 {
    switch (mb->mb_type) {
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
@@ -739,21 +717,21 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
          stream.eb[i][j].cb = !(mb->cbp & (*r->empty_block_mask)[2][i][j]);
       }
    }
-   stream.interlaced = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD ? 1.0f : 0.0f;
-   stream.frame_pred = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME ? 1.0f : 0.0f;
-   stream.bkwd_pred = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD ? 1.0f : 0.0f;
+   stream.eb[0][0].flag = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
+   stream.eb[0][1].flag = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME;
+   stream.eb[1][0].flag = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD;
    switch (mb->mb_type) {
       case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
-         stream.ref_frames = -1.0f;
+         stream.eb[1][1].flag = -1;
          break;
 
       case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
       case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
-         stream.ref_frames = 1.0f;
+         stream.eb[1][1].flag = 1;
          break;
 
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
-         stream.ref_frames = 0.0f;
+         stream.eb[1][1].flag = 0;
          break;
 
       default:
@@ -761,7 +739,7 @@ grab_vectors(struct vl_mpeg12_mc_renderer *r,
    }
 
    get_motion_vectors(mb, stream.mv);
-   vl_vb_add_block(&buffer->vertex_stream, (float*)&stream);
+   vl_vb_add_block(&buffer->vertex_stream, &stream);
 }
 
 static void
@@ -956,7 +934,6 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
 
    buffer->vertex_bufs.individual.stream = vl_vb_init(
       &buffer->vertex_stream, renderer->pipe, renderer->macroblocks_per_batch,
-      sizeof(struct vertex_stream) / sizeof(float),
       renderer->vertex_stream_stride);
 
    return true;
diff --git a/src/gallium/auxiliary/vl/vl_types.h b/src/gallium/auxiliary/vl/vl_types.h
index eeabd43cb23..9c745d73978 100644
--- a/src/gallium/auxiliary/vl/vl_types.h
+++ b/src/gallium/auxiliary/vl/vl_types.h
@@ -33,6 +33,11 @@ struct vertex2f
    float x, y;
 };
 
+struct vertex2s
+{
+   short x, y;
+};
+
 struct vertex4f
 {
    float x, y, z, w;
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 3a69730c9da..8599ed3533d 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -83,7 +83,7 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks)
 }
 
 struct pipe_vertex_element
-vl_vb_get_quad_vertex_element()
+vl_vb_get_quad_vertex_element(void)
 {
    struct pipe_vertex_element element;
 
@@ -100,7 +100,7 @@ unsigned
 vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements,
                               unsigned vertex_buffer_index)
 {
-   unsigned i, offset = 0;
+   unsigned i, size, offset = 0;
 
    assert(elements && num_elements);
 
@@ -116,14 +116,14 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements
 
 struct pipe_vertex_buffer
 vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
-           unsigned max_blocks, unsigned num_elements, unsigned stride)
+           unsigned max_blocks, unsigned stride)
 {
    struct pipe_vertex_buffer buf;
 
    assert(buffer);
 
    buffer->num_verts = 0;
-   buffer->num_elements = num_elements;
+   buffer->stride = stride;
 
    buf.stride = stride;
    buf.max_index = 4 * max_blocks - 1;
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index e312515dc30..b8e8766ec50 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -34,37 +34,37 @@
 struct vl_vertex_buffer
 {
    unsigned num_verts;
-   unsigned num_elements;
+   unsigned stride;
    struct pipe_resource *resource;
    struct pipe_transfer *transfer;
-   float *vectors;
+   void *vectors;
 };
 
 struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks);
 
-struct pipe_vertex_element vl_vb_get_quad_vertex_element();
+struct pipe_vertex_element vl_vb_get_quad_vertex_element(void);
 
 unsigned vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements,
                               unsigned vertex_buffer_index);
 
 struct pipe_vertex_buffer vl_vb_init(struct vl_vertex_buffer *buffer,
                                      struct pipe_context *pipe,
-                                     unsigned max_blocks, unsigned num_elements,
-                                     unsigned stride);
+                                     unsigned max_blocks, unsigned stride);
 
 void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
 static inline void
-vl_vb_add_block(struct vl_vertex_buffer *buffer, float *elements)
+vl_vb_add_block(struct vl_vertex_buffer *buffer, void *elements)
 {
-   float *pos;
+   void *pos;
    unsigned i;
 
    assert(buffer);
 
+   pos = buffer->vectors + buffer->num_verts * buffer->stride;
    for(i = 0; i < 4; ++i) {
-      pos = buffer->vectors + buffer->num_verts * buffer->num_elements;
-      memcpy(pos, elements, sizeof(float) * buffer->num_elements);
+      memcpy(pos, elements, buffer->stride);
+      pos += buffer->stride;
       buffer->num_verts++;
    }
 }
-- 
cgit v1.2.3


From 1b1c15a54aab7971d8296dc184a004dfd7bb5b3c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 10 Jan 2011 22:09:41 +0100
Subject: r600g: place buffers into different domains, based on usage flags

---
 src/gallium/winsys/r600/drm/r600_bo.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_bo.c b/src/gallium/winsys/r600/drm/r600_bo.c
index 6a3737f0a4a..339d5dc47f4 100644
--- a/src/gallium/winsys/r600/drm/r600_bo.c
+++ b/src/gallium/winsys/r600/drm/r600_bo.c
@@ -63,12 +63,25 @@ struct r600_bo *r600_bo(struct radeon *radeon,
 	 * and are used for uploads and downloads from regular
 	 * resources.  We generate them internally for some transfers.
 	 */
-	if (usage == PIPE_USAGE_STAGING)
-		bo->domains = RADEON_GEM_DOMAIN_CPU | RADEON_GEM_DOMAIN_GTT;
-	else
-		bo->domains = (RADEON_GEM_DOMAIN_CPU |
+	switch (usage) {
+        case PIPE_USAGE_DEFAULT:
+		bo->domains = RADEON_GEM_DOMAIN_CPU |
 				RADEON_GEM_DOMAIN_GTT |
-				RADEON_GEM_DOMAIN_VRAM);
+				RADEON_GEM_DOMAIN_VRAM;
+                break;
+
+        case PIPE_USAGE_DYNAMIC:
+        case PIPE_USAGE_STREAM:
+        case PIPE_USAGE_STAGING:
+		bo->domains = RADEON_GEM_DOMAIN_CPU |
+                                RADEON_GEM_DOMAIN_GTT;
+		break;
+
+        case PIPE_USAGE_STATIC:
+        case PIPE_USAGE_IMMUTABLE:
+		bo->domains = RADEON_GEM_DOMAIN_VRAM;
+		break;
+        }
 
 	pipe_reference_init(&bo->reference, 1);
 	return bo;
-- 
cgit v1.2.3


From b725bbebae19890ceaaa31c1d7fb7f155ac3b6ef Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 10 Jan 2011 22:19:14 +0100
Subject: [g3dvl] static usage for intermediate buffer

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 922bff73717..3b94e104b44 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -888,7 +888,7 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    template.width0 = renderer->buffer_width;
    template.height0 = renderer->buffer_height;
    template.depth0 = 1;
-   template.usage = PIPE_USAGE_DYNAMIC;
+   template.usage = PIPE_USAGE_STATIC;
    template.bind = PIPE_BIND_SAMPLER_VIEW;
    template.flags = 0;
 
-- 
cgit v1.2.3


From 7965e2fc16853ae0ed0a9cde77346c25feb19b6e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 11 Jan 2011 21:54:30 +0100
Subject: [g3dvl] merge fix

---
 src/gallium/include/pipe/p_video_context.h | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 7869f70058d..73f03d58ea6 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #ifndef PIPE_VIDEO_CONTEXT_H
@@ -106,19 +106,17 @@ struct pipe_video_context
                           struct pipe_fence_handle      **fence);
 
    void (*clear_render_target)(struct pipe_video_context *vpipe,
-                        struct pipe_surface *dst,
-                        unsigned dstx, unsigned dsty,
-						const float *rgba,
-                        unsigned width, unsigned height);
+                               struct pipe_surface *dst,
+                               unsigned dstx, unsigned dsty,
+                               const float *rgba,
+                               unsigned width, unsigned height);
 
    void (*resource_copy_region)(struct pipe_video_context *vpipe,
-                        struct pipe_resource *dst,
-						struct pipe_subresource subdst,
-                        unsigned dstx, unsigned dsty, unsigned dstz,
-                        struct pipe_resource *src,
-						struct pipe_subresource subsrc,
-                        unsigned srcx, unsigned srcy, unsigned srcz,
-                        unsigned width, unsigned height);
+                                struct pipe_resource *dst,
+                                unsigned dstx, unsigned dsty, unsigned dstz,
+                                struct pipe_resource *src,
+                                unsigned srcx, unsigned srcy, unsigned srcz,
+                                unsigned width, unsigned height);
 
    struct pipe_transfer *(*get_transfer)(struct pipe_video_context *vpipe,
                                          struct pipe_resource *resource,
-- 
cgit v1.2.3


From f013b4f8f1329982727691a55cc263e3011d02bf Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 29 Jan 2011 12:10:37 +0100
Subject: r600g: bugfixing register remapping

---
 src/gallium/drivers/r600/r600_asm.c | 368 ++++++++++++++++++++----------------
 1 file changed, 206 insertions(+), 162 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 935b8454560..f4ff2fc3d43 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -263,10 +263,10 @@ static void r600_bc_move_cf(struct r600_bc *bc, struct r600_bc_cf *cf, struct r6
 {
 	struct r600_bc_cf *prev = LIST_ENTRY(struct r600_bc_cf, next->list.prev, list);
 	unsigned old_id = cf->id;
-	unsigned new_id = prev->id + 2;
+	unsigned new_id = next->list.prev == &bc->cf ? 0 : prev->id + 2;
 	struct r600_bc_cf *other;
 
-	if (prev == cf)
+	if (prev == cf || next == cf)
 		return; /* position hasn't changed */
 
 	LIST_DEL(&cf->list);
@@ -1189,21 +1189,19 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 	if (!bc->cf_last->curr_bs_head) {
 		bc->cf_last->curr_bs_head = nalu;
 	}
-	/* replace special constants */
+	/* number of gpr == the last gpr used in any alu */
 	for (i = 0; i < 3; i++) {
+		if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
+			bc->ngpr = nalu->src[i].sel + 1;
+		}
 		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
 			r600_bc_special_constants(
 				nalu->src[i].value[nalu->src[i].chan],
 				&nalu->src[i].sel, &nalu->src[i].neg);
-
-		if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
-			bc->ngpr = nalu->src[i].sel + 1;
-		}
 	}
 	if (nalu->dst.sel >= bc->ngpr) {
 		bc->ngpr = nalu->dst.sel + 1;
 	}
-
 	LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
 	/* each alu use 2 dwords */
 	bc->cf_last->ndw += 2;
@@ -1474,8 +1472,8 @@ enum cf_class
 	CF_CLASS_EXPORT,
 	CF_CLASS_OTHER
 };
-
-static enum cf_class get_cf_class(struct r600_bc_cf *cf)
+ 
+static enum cf_class r600_bc_cf_class(struct r600_bc_cf *cf)
 {
 	switch (cf->inst) {
 	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
@@ -1493,6 +1491,8 @@ static enum cf_class get_cf_class(struct r600_bc_cf *cf)
 
 	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
 	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+	case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+	case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
 		return CF_CLASS_EXPORT;
 
 	case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
@@ -1518,7 +1518,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 	unsigned id = cf->id;
 	unsigned end_of_program = bc->cf.prev == &cf->list;
 
-	switch (get_cf_class(cf)) {
+	switch (r600_bc_cf_class(cf)) {
 	case CF_CLASS_ALU:
 		assert(!end_of_program);
 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
@@ -1574,81 +1574,103 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 
 struct gpr_usage_range {
 	int	replacement;
-	int32_t	start;
-	int32_t	end;
+	int	rel_block;
+	int	start;
+	int	end;
 };
 
 struct gpr_usage {
 	unsigned		channels:4;
-	int32_t			first_write;
-	int32_t			last_write[4];
+	int			first_write;
+	int			last_write[4];
 	unsigned	        nranges;
 	struct gpr_usage_range  *ranges;
 };
 
+static struct gpr_usage_range* last_gpr_usage_range(struct gpr_usage *usage)
+{
+	if (usage->nranges)
+		return usage->ranges + usage->nranges - 1;
+	else
+		return NULL;
+}
+
 static struct gpr_usage_range* add_gpr_usage_range(struct gpr_usage *usage)
 {
+	struct gpr_usage_range *range;
+
 	usage->nranges++;
 	usage->ranges = realloc(usage->ranges, usage->nranges * sizeof(struct gpr_usage_range));
 	if (!usage->ranges)
 		return NULL;
-	return &usage->ranges[usage->nranges-1];
+
+	range = last_gpr_usage_range(usage);
+	range->replacement = -1; /* no prefered replacement */
+	range->rel_block = -1;
+	range->start = -1;
+	range->end = -1;
+
+	return range;
 }
 
-static void notice_gpr_read(struct gpr_usage *usage, int32_t id, unsigned chan)
+static void notice_gpr_read(struct gpr_usage *usage, int id, unsigned chan)
 {
+	struct gpr_usage_range* range;
+
         usage->channels |= 1 << chan;
         usage->first_write = -1;
         if (!usage->nranges) {
-        	struct gpr_usage_range* range = add_gpr_usage_range(usage);
-        	range->replacement = -1;
-                range->start = -1;
-                range->end = -1;
-        }
-        if (usage->ranges[usage->nranges-1].end < id)
-		usage->ranges[usage->nranges-1].end = id;
+        	range = add_gpr_usage_range(usage);
+        } else
+		range = last_gpr_usage_range(usage);
+
+        if (range && range->end < id)
+		range->end = id;
 }
 
-static void notice_gpr_rel_read(struct gpr_usage usage[128], int32_t id, unsigned chan)
+static void notice_gpr_rel_read(struct r600_bc *bc, struct gpr_usage usage[128],
+				int id, unsigned gpr, unsigned chan)
 {
 	unsigned i;
-	for (i = 0; i < 128; ++i)
+	for (i = gpr; i < bc->ngpr; ++i)
 		notice_gpr_read(&usage[i], id, chan);
+
+	last_gpr_usage_range(&usage[gpr])->rel_block = bc->ngpr - gpr;
 }
 
-static void notice_gpr_last_write(struct gpr_usage *usage, int32_t id, unsigned chan)
+static void notice_gpr_last_write(struct gpr_usage *usage, int id, unsigned chan)
 {
         usage->last_write[chan] = id;
 }
 
-static void notice_gpr_write(struct gpr_usage *usage, int32_t id, unsigned chan,
+static void notice_gpr_write(struct gpr_usage *usage, int id, unsigned chan,
 				int predicate, int prefered_replacement)
 {
-	int32_t start = usage->first_write != -1 ? usage->first_write : id;
+	struct gpr_usage_range* last_range = last_gpr_usage_range(usage);
+	int start = usage->first_write != -1 ? usage->first_write : id;
 	usage->channels &= ~(1 << chan);
 	if (usage->channels) {
 		if (usage->first_write == -1)
 			usage->first_write = id;
-	} else if (!usage->nranges || (usage->ranges[usage->nranges-1].start != start && !predicate)) {
+	} else if (!last_range || (last_range->start != start && !predicate)) {
 		usage->first_write = start;
 		struct gpr_usage_range* range = add_gpr_usage_range(usage);
 		range->replacement = prefered_replacement;
                 range->start = start;
-                range->end = -1;
-        } else if (usage->ranges[usage->nranges-1].start == start && prefered_replacement != -1) {
-        	usage->ranges[usage->nranges-1].replacement = prefered_replacement;
+        } else if (last_range->start == start && prefered_replacement != -1) {
+        	last_range->replacement = prefered_replacement;
         }
         notice_gpr_last_write(usage, id, chan);
 }
 
-static void notice_gpr_rel_last_write(struct gpr_usage usage[128], int32_t id, unsigned chan)
+static void notice_gpr_rel_last_write(struct gpr_usage usage[128], int id, unsigned chan)
 {
 	unsigned i;
 	for (i = 0; i < 128; ++i)
 		notice_gpr_last_write(&usage[i], id, chan);
 }
 
-static void notice_gpr_rel_write(struct gpr_usage usage[128], int32_t id, unsigned chan)
+static void notice_gpr_rel_write(struct gpr_usage usage[128], int id, unsigned chan)
 {
 	unsigned i;
 	for (i = 0; i < 128; ++i)
@@ -1656,7 +1678,7 @@ static void notice_gpr_rel_write(struct gpr_usage usage[128], int32_t id, unsign
 }
 
 static void notice_alu_src_gprs(struct r600_bc *bc, struct r600_bc_alu *alu,
-                                struct gpr_usage usage[128], int32_t id)
+                                struct gpr_usage usage[128], int id)
 {
 	unsigned src, num_src;
 
@@ -1667,14 +1689,14 @@ static void notice_alu_src_gprs(struct r600_bc *bc, struct r600_bc_alu *alu,
 			continue;
 
 		if (alu->src[src].rel)
-			notice_gpr_rel_read(usage, id, alu->src[src].chan);
+			notice_gpr_rel_read(bc, usage, id, alu->src[src].sel, alu->src[src].chan);
 		else
 			notice_gpr_read(&usage[alu->src[src].sel], id, alu->src[src].chan);
 	}
 }
 
 static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage usage[128],
-				int32_t id, int predicate)
+				int id, int predicate)
 {
 	struct r600_bc_alu *alu;
 	for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
@@ -1693,18 +1715,19 @@ static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage
 	}
 }
 
-static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128],
-				int32_t id, int predicate)
+static void notice_tex_gprs(struct r600_bc *bc, struct r600_bc_tex *tex,
+				struct gpr_usage usage[128],
+				int id, int predicate)
 {
 	if (tex->src_rel) {
                 if (tex->src_sel_x < 4)
-			notice_gpr_rel_read(usage, id, tex->src_sel_x);
+			notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_x);
 		if (tex->src_sel_y < 4)
-			notice_gpr_rel_read(usage, id, tex->src_sel_y);
+			notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_y);
 		if (tex->src_sel_z < 4)
-			notice_gpr_rel_read(usage, id, tex->src_sel_z);
+			notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_z);
 		if (tex->src_sel_w < 4)
-			notice_gpr_rel_read(usage, id, tex->src_sel_w);
+			notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_w);
         } else {
 		if (tex->src_sel_x < 4)
 			notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_x);
@@ -1737,7 +1760,7 @@ static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128]
 }
 
 static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128],
-				int32_t id, int predicate)
+				int id, int predicate)
 {
 	notice_gpr_read(&usage[vtx->src_gpr], id, vtx->src_sel_x);
 
@@ -1752,11 +1775,14 @@ static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128]
 }
 
 static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128],
-				struct r600_bc_cf *export_cf[128], int32_t export_remap[128])
+				struct r600_bc_cf *export_cf[128], int export_remap[128])
 {
 	//TODO handle other memory operations
 	struct gpr_usage *output = &usage[cf->output.gpr];
-	int32_t id = (output->last_write[0] + 0x100) & ~0xFF;
+	int id = MAX4(output->last_write[0], output->last_write[1],
+		output->last_write[2], output->last_write[3]);
+	id += 0x100;
+	id &= ~0xFF;
 
 	export_cf[cf->output.gpr] = cf;
 	export_remap[cf->output.gpr] = id;
@@ -1770,7 +1796,7 @@ static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128
 		notice_gpr_read(output, id, cf->output.swizzle_w);
 }
 
-static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int32_t id)
+static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int id)
 {
 	unsigned i;
 	for (i = 0; i < usage->nranges; ++i) {
@@ -1782,21 +1808,20 @@ static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int32_t i
 	return NULL;
 }
 
-static struct gpr_usage_range *find_dst_range(struct gpr_usage *usage, int32_t id)
+static struct gpr_usage_range *find_dst_range(struct gpr_usage *usage, int id)
 {
 	unsigned i;
 	for (i = 0; i < usage->nranges; ++i) {
 		struct gpr_usage_range* range = &usage->ranges[i];
-		int32_t end = range->end;
+		int end = range->end;
 
 		if (range->start <= id && (id < end || end == -1))
 			return range;
 	}
-	assert(0); /* should not happen */
 	return NULL;
 }
 
-static int is_barrier_needed(struct gpr_usage *usage, int32_t id, unsigned chan, int32_t last_barrier)
+static int is_barrier_needed(struct gpr_usage *usage, int id, unsigned chan, int last_barrier)
 {
 	if (usage->last_write[chan] != (id & ~0xFF))
 		return usage->last_write[chan] >= last_barrier;
@@ -1809,33 +1834,47 @@ static int is_intersection(struct gpr_usage_range* a, struct gpr_usage_range* b)
 	return a->start <= b->end && b->start < a->end;
 }
 
-static int rate_replacement(struct gpr_usage *usage, struct gpr_usage_range* range)
+static int rate_replacement(struct gpr_usage usage[128], unsigned current, unsigned gpr,
+				struct gpr_usage_range* range)
 {
+	int max_gpr = gpr + MAX2(range->rel_block, 1);
+	int best_start = 0x3FFFFFFF, best_end = 0x3FFFFFFF;
 	unsigned i;
-	int32_t best_start = 0x3FFFFFFF, best_end = 0x3FFFFFFF;
 
-	for (i = 0; i < usage->nranges; ++i) {
-		if (usage->ranges[i].replacement != -1)
-			continue; /* ignore already remapped ranges */
+	for (; gpr < max_gpr; ++gpr) {
+
+		if (gpr >= 128) /* relative gpr block won't fit into clause temporaries */
+			return -1; /* forget it */
+
+		if (gpr == current) /* ignore ranges of to be replaced register */
+			continue;
+
+		for (i = 0; i < usage[gpr].nranges; ++i) {
+			if (usage[gpr].ranges[i].replacement < gpr)
+				continue; /* ignore already remapped ranges */
 
-		if (is_intersection(&usage->ranges[i], range))
-			return -1; /* forget it if usages overlap */
+			if (is_intersection(&usage[gpr].ranges[i], range))
+				return -1; /* forget it if usages overlap */
 
-		if (range->start >= usage->ranges[i].end)
-			best_start = MIN2(best_start, range->start - usage->ranges[i].end);
+			if (range->start >= usage[gpr].ranges[i].end)
+				best_start = MIN2(best_start, range->start - usage[gpr].ranges[i].end);
 
-		if (range->end != -1 && range->end <= usage->ranges[i].start)
-			best_end = MIN2(best_end, usage->ranges[i].start - range->end);
+			if (range->end != -1 && range->end <= usage[gpr].ranges[i].start)
+				best_end = MIN2(best_end, usage[gpr].ranges[i].start - range->end);
+		}
 	}
 	return best_start + best_end;
 }
 
 static void find_replacement(struct gpr_usage usage[128], unsigned current,
-				struct gpr_usage_range *range, int is_export)
+				struct gpr_usage_range *range)
 {
-	unsigned i;
+	unsigned i, j;
 	int best_gpr = -1, best_rate = 0x7FFFFFFF;
 
+	if (range->replacement == current) 
+		return; /* register prefers to be not remapped */
+
 	if (range->replacement != -1 && range->replacement <= current) {
 		struct gpr_usage_range *other = find_src_range(&usage[range->replacement], range->start);
 		if (other && other->replacement != -1)
@@ -1843,7 +1882,7 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current,
 	}
 
 	if (range->replacement != -1 && range->replacement < current) {
-		int rate = rate_replacement(&usage[range->replacement], range);
+		int rate = rate_replacement(usage, current, range->replacement, range);
 
 		/* check if prefered replacement can be used */
 		if (rate != -1) {
@@ -1854,9 +1893,9 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current,
 
 	if (best_gpr == -1 && (range->start & ~0xFF) == (range->end & ~0xFF)) {
 		/* register is just used inside one ALU clause */
-		/* try to use clause temporaryis for it */
+		/* try to use clause temporaries for it */
 		for (i = 127; i > 123; --i) {
-			int rate = rate_replacement(&usage[i], range);
+			int rate = rate_replacement(usage, current, i, range);
 
 			if (rate == -1) /* can't be used because ranges overlap */
 				continue;
@@ -1866,7 +1905,7 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current,
 				best_gpr = i;
 
 				/* can't get better than this */
-				if (rate == 0 || is_export)
+				if (rate == 0)
 					break;
 			}
 		}
@@ -1874,7 +1913,7 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current,
 
 	if (best_gpr == -1) {
 		for (i = 0; i < current; ++i) {
-			int rate = rate_replacement(&usage[i], range);
+			int rate = rate_replacement(usage, current, i, range);
 
 			if (rate == -1) /* can't be used because ranges overlap */
 				continue;
@@ -1890,33 +1929,30 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current,
 		}
 	}
 
-	range->replacement = best_gpr;
 	if (best_gpr != -1) {
 		struct gpr_usage_range *reservation = add_gpr_usage_range(&usage[best_gpr]);
-		reservation->replacement = -1;
+		reservation->replacement = best_gpr;
+		reservation->rel_block = -1;
 		reservation->start = range->start;
 		reservation->end = range->end;
-	}
-}
-
-static void find_export_replacement(struct gpr_usage usage[128],
-				struct gpr_usage_range *range, struct r600_bc_cf *current,
-				struct r600_bc_cf *next, int32_t next_id)
-{
-	if (!next || next_id <= range->start || next_id > range->end)
-		return;
-
-	if (current->output.type != next->output.type)
-		return;
+	} else
+		best_gpr = current;
 
-	if ((current->output.array_base + 1) != next->output.array_base)
-		return;
+	range->replacement = best_gpr;
+	if (range->rel_block == -1)
+		return; /* no relative block to handle we are done here */
 
-	find_src_range(&usage[next->output.gpr], next_id)->replacement = range->replacement + 1;
+	/* set prefered register for the whole relative register block */
+	for (i = current + 1, ++best_gpr; i < current + range->rel_block; ++i, ++best_gpr) {
+		for (j = 0; j < usage[i].nranges; ++j) {
+			if (is_intersection(&usage[i].ranges[j], range))
+				usage[i].ranges[j].replacement = best_gpr;
+		}
+	}
 }
 
 static void replace_alu_gprs(struct r600_bc *bc, struct r600_bc_alu *alu, struct gpr_usage usage[128],
-				int32_t id, int32_t last_barrier, unsigned *barrier)
+				int id, int last_barrier, unsigned *barrier)
 {
 	struct gpr_usage *cur_usage;
 	struct gpr_usage_range *range;
@@ -1930,8 +1966,7 @@ static void replace_alu_gprs(struct r600_bc *bc, struct r600_bc_alu *alu, struct
 
 		cur_usage = &usage[alu->src[src].sel];
 		range = find_src_range(cur_usage, id);
-		if (range->replacement != -1)
-			alu->src[src].sel = range->replacement;
+		alu->src[src].sel = range->replacement;
 
 		*barrier |= is_barrier_needed(cur_usage, id, alu->src[src].chan, last_barrier);
 	}
@@ -1939,15 +1974,18 @@ static void replace_alu_gprs(struct r600_bc *bc, struct r600_bc_alu *alu, struct
 	if (alu->dst.write) {
 		cur_usage = &usage[alu->dst.sel];
 		range = find_dst_range(cur_usage, id);
-		if (range->replacement == alu->dst.sel) {
+		if (!range || range->replacement == -1) {
 			if (!alu->is_op3)
 				alu->dst.write = 0;
 			else
 				/*TODO: really check that register 123 is useable */
 				alu->dst.sel = 123;
-		} else if (range->replacement != -1) {
+		} else {
 			alu->dst.sel = range->replacement;
+			*barrier |= is_barrier_needed(cur_usage, id, alu->dst.chan, last_barrier);
 		}
+	}
+	if (alu->dst.write) {
 		if (alu->dst.rel)
 			notice_gpr_rel_last_write(usage, id, alu->dst.chan);
 		else
@@ -1956,7 +1994,7 @@ static void replace_alu_gprs(struct r600_bc *bc, struct r600_bc_alu *alu, struct
 }
 
 static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128],
-				int32_t id, int32_t last_barrier, unsigned *barrier)
+				int id, int last_barrier, unsigned *barrier)
 {
 	struct gpr_usage *cur_usage = &usage[tex->src_gpr];
 	struct gpr_usage_range *range = find_src_range(cur_usage, id);
@@ -1973,64 +2011,68 @@ static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128
 		if (tex->src_sel_w < 4)
 			*barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_w, last_barrier);
 	}
-
-	if (range->replacement != -1)
-		tex->src_gpr = range->replacement;
+	tex->src_gpr = range->replacement;
 
 	cur_usage = &usage[tex->dst_gpr];
+
 	range = find_dst_range(cur_usage, id);
-	if (range->replacement != -1)
+	if (range) {
 		tex->dst_gpr = range->replacement;
 
-	if (tex->dst_rel) {
-		if (tex->dst_sel_x != 7)
-			notice_gpr_rel_last_write(usage, id, tex->dst_sel_x);
-		if (tex->dst_sel_y != 7)
-			notice_gpr_rel_last_write(usage, id, tex->dst_sel_y);
-		if (tex->dst_sel_z != 7)
-			notice_gpr_rel_last_write(usage, id, tex->dst_sel_z);
-		if (tex->dst_sel_w != 7)
-			notice_gpr_rel_last_write(usage, id, tex->dst_sel_w);
+		if (tex->dst_rel) {
+			if (tex->dst_sel_x != 7)
+				notice_gpr_rel_last_write(usage, id, tex->dst_sel_x);
+			if (tex->dst_sel_y != 7)
+				notice_gpr_rel_last_write(usage, id, tex->dst_sel_y);
+			if (tex->dst_sel_z != 7)
+				notice_gpr_rel_last_write(usage, id, tex->dst_sel_z);
+			if (tex->dst_sel_w != 7)
+				notice_gpr_rel_last_write(usage, id, tex->dst_sel_w);
+		} else {
+			if (tex->dst_sel_x != 7)
+				notice_gpr_last_write(cur_usage, id, tex->dst_sel_x);
+			if (tex->dst_sel_y != 7)
+				notice_gpr_last_write(cur_usage, id, tex->dst_sel_y);
+			if (tex->dst_sel_z != 7)
+				notice_gpr_last_write(cur_usage, id, tex->dst_sel_z);
+			if (tex->dst_sel_w != 7)
+				notice_gpr_last_write(cur_usage, id, tex->dst_sel_w);
+		}
 	} else {
-		if (tex->dst_sel_x != 7)
-			notice_gpr_last_write(cur_usage, id, tex->dst_sel_x);
-		if (tex->dst_sel_y != 7)
-			notice_gpr_last_write(cur_usage, id, tex->dst_sel_y);
-		if (tex->dst_sel_z != 7)
-			notice_gpr_last_write(cur_usage, id, tex->dst_sel_z);
-		if (tex->dst_sel_w != 7)
-			notice_gpr_last_write(cur_usage, id, tex->dst_sel_w);
+		tex->dst_gpr = 123;
 	}
 }
 
 static void replace_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128],
-				int32_t id, int32_t last_barrier, unsigned *barrier)
+				int id, int last_barrier, unsigned *barrier)
 {
 	struct gpr_usage *cur_usage = &usage[vtx->src_gpr];
 	struct gpr_usage_range *range = find_src_range(cur_usage, id);
 
 	*barrier |= is_barrier_needed(cur_usage, id, vtx->src_sel_x, last_barrier);
 
-	if (range->replacement != -1)
-		vtx->src_gpr = range->replacement;
+	vtx->src_gpr = range->replacement;
 
 	cur_usage = &usage[vtx->dst_gpr];
 	range = find_dst_range(cur_usage, id);
-	if (range->replacement != -1)
+	if (range) {
 		vtx->dst_gpr = range->replacement;
 
-	if (vtx->dst_sel_x != 7)
-		notice_gpr_last_write(cur_usage, id, vtx->dst_sel_x);
-	if (vtx->dst_sel_y != 7)
-		notice_gpr_last_write(cur_usage, id, vtx->dst_sel_y);
-	if (vtx->dst_sel_z != 7)
-		notice_gpr_last_write(cur_usage, id, vtx->dst_sel_z);
-	if (vtx->dst_sel_w != 7)
-		notice_gpr_last_write(cur_usage, id, vtx->dst_sel_w);
+		if (vtx->dst_sel_x != 7)
+			notice_gpr_last_write(cur_usage, id, vtx->dst_sel_x);
+		if (vtx->dst_sel_y != 7)
+			notice_gpr_last_write(cur_usage, id, vtx->dst_sel_y);
+		if (vtx->dst_sel_z != 7)
+			notice_gpr_last_write(cur_usage, id, vtx->dst_sel_z);
+		if (vtx->dst_sel_w != 7)
+			notice_gpr_last_write(cur_usage, id, vtx->dst_sel_w);
+	} else {
+		vtx->dst_gpr = 123;
+	}
 }
 
 static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128],
-				int32_t id, int32_t last_barrier)
+				int id, int last_barrier)
 {
 	//TODO handle other memory operations
 	struct gpr_usage *cur_usage = &usage[cf->output.gpr];
@@ -2046,8 +2088,7 @@ static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[12
 	if (cf->output.swizzle_w < 4)
 		cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_w, last_barrier);
 
-	if (range->replacement != -1)
-		cf->output.gpr = range->replacement;
+	cf->output.gpr = range->replacement;
 }
 
 static void optimize_alu_inst(struct r600_bc *bc, struct r600_bc_cf *cf, struct r600_bc_alu *alu)
@@ -2138,9 +2179,9 @@ static void r600_bc_optimize(struct r600_bc *bc)
 
 	/* assume that each gpr is exported only once */
 	struct r600_bc_cf *export_cf[128] = { NULL };
-	int32_t export_remap[128];
+	int export_remap[128];
 
-	int32_t id, barrier[bc->nstack];
+	int id, cond_start, barrier[bc->nstack];
 	unsigned i, j, stack, predicate, old_stack;
 
 	memset(&usage, 0, sizeof(usage));
@@ -2155,7 +2196,11 @@ static void r600_bc_optimize(struct r600_bc *bc)
 	/* first gather some informations about the gpr usage */
 	id = 0; stack = 0;
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
-		switch (get_cf_class(cf)) {
+		old_stack = stack;
+		if (stack == 0)
+			cond_start = stack;
+
+		switch (r600_bc_cf_class(cf)) {
 		case CF_CLASS_ALU:
 			predicate = 0;
 			first = NULL;
@@ -2180,7 +2225,7 @@ static void r600_bc_optimize(struct r600_bc *bc)
 			break;
 		case CF_CLASS_TEXTURE:
 			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
-				notice_tex_gprs(tex, usage, id++, stack > 0);
+				notice_tex_gprs(bc, tex, usage, id++, stack > 0);
 			}
 			break;
 		case CF_CLASS_VERTEX:
@@ -2207,6 +2252,14 @@ static void r600_bc_optimize(struct r600_bc *bc)
 				goto out;
 			}
 		}
+
+		/* extend last_write after conditional block */
+		if (stack == 0 && old_stack != 0)
+			for (i = 0; i < 128; ++i)
+				for (j = 0; j < 4; ++j)
+					if (usage[i].last_write[j] >= cond_start)
+						usage[i].last_write[j] = id;
+
 		id += 0x100;
 	        id &= ~0xFF;
 	}
@@ -2216,26 +2269,19 @@ static void r600_bc_optimize(struct r600_bc *bc)
 	for (i = 0; i < 124; ++i) {
 		for (j = 0; j < usage[i].nranges; ++j) {
 			struct gpr_usage_range *range = &usage[i].ranges[j];
-			int is_export = export_cf[i] && export_cf[i + 1] &&
-				range->start < export_remap[i] &&
-				export_remap[i] <= range->end;
-
 			if (range->start == -1)
-				range->replacement = -1;
-			else if (range->end == -1)
+				/* can't rearange shader inputs */
 				range->replacement = i;
+			else if (range->end == -1)
+				/* gpr isn't used any more after this instruction */
+				range->replacement = -1;
 			else
-				find_replacement(usage, i, range, is_export);
+				find_replacement(usage, i, range);
 
-			if (range->replacement == -1)
+			if (range->replacement == i)
 				bc->ngpr = i;
 			else if (range->replacement < i && range->replacement > bc->ngpr)
 				bc->ngpr = range->replacement;
-
-			if (is_export && range->replacement != -1) {
-				find_export_replacement(usage, range, export_cf[i],
-							export_cf[i + 1], export_remap[i + 1]);
-			}
 		}
 	}
 	bc->ngpr++;
@@ -2251,7 +2297,7 @@ static void r600_bc_optimize(struct r600_bc *bc)
 	id = 0; stack = 0;
 	LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
 		old_stack = stack;
-		switch (get_cf_class(cf)) {
+		switch (r600_bc_cf_class(cf)) {
 		case CF_CLASS_ALU:
 			predicate = 0;
 			first = NULL;
@@ -2308,7 +2354,7 @@ static void r600_bc_optimize(struct r600_bc *bc)
 			barrier[i] = barrier[old_stack];
 
 		id += 0x100;
-		if (stack != 0) /* ensue exports are placed outside of conditional blocks */
+		if (stack != 0) /* ensure exports are placed outside of conditional blocks */
 			continue;
 
 		for (i = 0; i < 128; ++i) {
@@ -2350,13 +2396,13 @@ int r600_bc_build(struct r600_bc *bc)
 		bc->nstack = 1;
 	}
 
-	r600_bc_optimize(bc);
+	//r600_bc_optimize(bc);
 
 	/* first path compute addr of each CF block */
 	/* addr start after all the CF instructions */
 	addr = LIST_ENTRY(struct r600_bc_cf, bc->cf.prev, list)->id + 2;
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
-		switch (get_cf_class(cf)) {
+		switch (r600_bc_cf_class(cf)) {
 		case CF_CLASS_ALU:
 			break;
 		case CF_CLASS_TEXTURE:
@@ -2365,7 +2411,6 @@ int r600_bc_build(struct r600_bc *bc)
 			addr += 3;
 			addr &= 0xFFFFFFFCUL;
 			break;
-			break;
 		case CF_CLASS_EXPORT:
 			if (cf->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT))
 				exports[cf->output.type] = cf;
@@ -2400,7 +2445,7 @@ int r600_bc_build(struct r600_bc *bc)
 			r = r600_bc_cf_build(bc, cf);
 		if (r)
 			return r;
-		switch (get_cf_class(cf)) {
+		switch (r600_bc_cf_class(cf)) {
 		case CF_CLASS_ALU:
 			nliteral = 0;
 			memset(literal, 0, sizeof(literal));
@@ -2526,7 +2571,7 @@ void r600_bc_dump(struct r600_bc *bc)
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
 		id = cf->id;
 
-		switch (get_cf_class(cf)) {
+		switch (r600_bc_cf_class(cf)) {
 		case CF_CLASS_ALU:
 			fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
 			fprintf(stderr, "ADDR:%04d ", cf->addr);
@@ -2564,7 +2609,6 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
 			fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
 			fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
-			fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
 			fprintf(stderr, "BARRIER:%d ", cf->barrier);
 			fprintf(stderr, "INST:%d ", cf->inst);
 			fprintf(stderr, "BURST_COUNT:%d\n", cf->output.burst_count);
@@ -2649,21 +2693,21 @@ void r600_cf_vtx(struct r600_vertex_element *ve, u32 *bytecode, unsigned count)
 	if (count > 8) {
 		bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
 		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
-						S_SQ_CF_WORD1_BARRIER(0) |
+						S_SQ_CF_WORD1_BARRIER(1) |
 						S_SQ_CF_WORD1_COUNT(8 - 1);
 		bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
 		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
-						S_SQ_CF_WORD1_BARRIER(0) |
+						S_SQ_CF_WORD1_BARRIER(1) |
 						S_SQ_CF_WORD1_COUNT(count - 8 - 1);
 	} else {
 		bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
 		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX) |
-						S_SQ_CF_WORD1_BARRIER(0) |
+						S_SQ_CF_WORD1_BARRIER(1) |
 						S_SQ_CF_WORD1_COUNT(count - 1);
 	}
 	bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
 	bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
-			S_SQ_CF_WORD1_BARRIER(0);
+			S_SQ_CF_WORD1_BARRIER(1);
 
 	rstate = &ve->rstate;
 	rstate->id = R600_PIPE_STATE_FETCH_SHADER;
@@ -2685,21 +2729,21 @@ void r600_cf_vtx_tc(struct r600_vertex_element *ve, u32 *bytecode, unsigned coun
 	if (count > 8) {
 		bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
 		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
-						S_SQ_CF_WORD1_BARRIER(0) |
+						S_SQ_CF_WORD1_BARRIER(1) |
 						S_SQ_CF_WORD1_COUNT(8 - 1);
 		bytecode[i++] = S_SQ_CF_WORD0_ADDR(40 >> 1);
 		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
-						S_SQ_CF_WORD1_BARRIER(0) |
+						S_SQ_CF_WORD1_BARRIER(1) |
 						S_SQ_CF_WORD1_COUNT((count - 8) - 1);
 	} else {
 		bytecode[i++] = S_SQ_CF_WORD0_ADDR(8 >> 1);
 		bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC) |
-						S_SQ_CF_WORD1_BARRIER(0) |
+						S_SQ_CF_WORD1_BARRIER(1) |
 						S_SQ_CF_WORD1_COUNT(count - 1);
 	}
 	bytecode[i++] = S_SQ_CF_WORD0_ADDR(0);
 	bytecode[i++] = S_SQ_CF_WORD1_CF_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN) |
-			S_SQ_CF_WORD1_BARRIER(0);
+			S_SQ_CF_WORD1_BARRIER(1);
 
 	rstate = &ve->rstate;
 	rstate->id = R600_PIPE_STATE_FETCH_SHADER;
-- 
cgit v1.2.3


From 77217af40d67612d1f1089ca188393d27a8a038f Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 24 Feb 2011 22:28:38 +0100
Subject: r600g: Merge fix

---
 src/gallium/drivers/r600/r600_texture.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 03af367401d..4c9d5609c06 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -309,7 +309,16 @@ static boolean permit_hardware_blit(struct pipe_screen *screen,
 				PIPE_BIND_SAMPLER_VIEW, 0))
 		return FALSE;
 
-	return TRUE;
+        switch (res->usage) {
+        case PIPE_USAGE_STREAM:
+        case PIPE_USAGE_STAGING:
+        case PIPE_USAGE_STATIC:
+        case PIPE_USAGE_IMMUTABLE:
+                return FALSE;
+
+        default:
+                return TRUE;
+        }
 }
 
 static boolean r600_texture_get_handle(struct pipe_screen* screen,
-- 
cgit v1.2.3


From ed12c29bc45b100b758c9affe2cebe8c8498e25e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 2 Mar 2011 20:48:03 +0100
Subject: r600g: merge fix

---
 src/gallium/drivers/r600/r600_asm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 8006e9b9a58..3e478382801 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1529,7 +1529,7 @@ enum cf_class
 	CF_CLASS_EXPORT,
 	CF_CLASS_OTHER
 };
- 
+
 static enum cf_class r600_bc_cf_class(struct r600_bc_cf *cf)
 {
 	switch (cf->inst) {
@@ -1561,6 +1561,7 @@ static enum cf_class r600_bc_cf_class(struct r600_bc_cf *cf)
 	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
 	case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
 	case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+	case V_SQ_CF_WORD1_SQ_CF_INST_NOP:
 		return CF_CLASS_OTHER;
 
 	default:
@@ -1929,7 +1930,7 @@ static void find_replacement(struct gpr_usage usage[128], unsigned current,
 	unsigned i, j;
 	int best_gpr = -1, best_rate = 0x7FFFFFFF;
 
-	if (range->replacement == current) 
+	if (range->replacement == current)
 		return; /* register prefers to be not remapped */
 
 	if (range->replacement != -1 && range->replacement <= current) {
@@ -3079,6 +3080,7 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 	}
 
 	r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
+	r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_NOP));
 
 	/* use PIPE_BIND_VERTEX_BUFFER so we use the cache buffer manager */
 	ve->fetch_shader = r600_bo(rctx->radeon, bc.ndw*4, 256, PIPE_BIND_VERTEX_BUFFER, 0);
-- 
cgit v1.2.3


From 4ea38176028a6ecfc6ed195f64429b6b34279359 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 5 Mar 2011 16:36:38 +0100
Subject: [g3dvl] use instanced drawing to reduce the vertex buffer payload

---
 src/gallium/auxiliary/vl/vl_idct.c               | 4 ++--
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 2 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     | 2 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     | 8 ++------
 4 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 89463a5c75c..e1e57431a0b 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -752,7 +752,7 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer)
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
       idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
-      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts);
+      util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_verts);
 
       /* second stage */
       idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]);
@@ -761,6 +761,6 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer)
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
       idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
-      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts);
+      util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_verts);
    }
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 484e781f0cb..2f68ec17042 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -1080,7 +1080,7 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
 
    renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs);
-   util_draw_arrays(renderer->pipe, PIPE_PRIM_QUADS, 0, buffer->num_macroblocks * 4);
+   util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, buffer->num_macroblocks);
 
    renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, buffer->fence);
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 552a0451fef..610f37e1f89 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -106,7 +106,7 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements
 
    for ( i = 0; i < num_elements; ++i ) {
       elements[i].src_offset = offset;
-      elements[i].instance_divisor = 0;
+      elements[i].instance_divisor = 1;
       elements[i].vertex_buffer_index = vertex_buffer_index;
       offset += util_format_get_blocksize(elements[i].src_format);
    }
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index b8e8766ec50..ca06abe2027 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -57,16 +57,12 @@ static inline void
 vl_vb_add_block(struct vl_vertex_buffer *buffer, void *elements)
 {
    void *pos;
-   unsigned i;
 
    assert(buffer);
 
    pos = buffer->vectors + buffer->num_verts * buffer->stride;
-   for(i = 0; i < 4; ++i) {
-      memcpy(pos, elements, buffer->stride);
-      pos += buffer->stride;
-      buffer->num_verts++;
-   }
+   memcpy(pos, elements, buffer->stride);
+   buffer->num_verts++;
 }
 
 void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
-- 
cgit v1.2.3


From 43af13b2cb6d865610b3cdf64f8dbf16a9bca215 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 8 Mar 2011 16:48:39 +0100
Subject: r600g: set start instance correctly

---
 src/gallium/drivers/r600/r600_state_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 3c072fe7ca9..43dad0c8023 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -516,7 +516,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 	r600_pipe_state_add_reg(&vgt, R_028404_VGT_MIN_VTX_INDX, draw.info.min_index, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(&vgt, R_028238_CB_TARGET_MASK, rctx->cb_target_mask & mask, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(&vgt, R_03CFF0_SQ_VTX_BASE_VTX_LOC, 0, 0xFFFFFFFF, NULL);
-	r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, 0, 0xFFFFFFFF, NULL);
+	r600_pipe_state_add_reg(&vgt, R_03CFF4_SQ_VTX_START_INST_LOC, draw.info.start_instance, 0xFFFFFFFF, NULL);
 	r600_context_pipe_state_set(&rctx->ctx, &vgt);
 
 	rdraw.vgt_num_indices = draw.info.count;
-- 
cgit v1.2.3


From 310eea52ca1e997295c84163066cc5d0fd4f8cf6 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 8 Mar 2011 18:34:05 +0100
Subject: [g3dvl] use a single vertex buffer for both idct and mc

---
 src/gallium/auxiliary/vl/vl_idct.c               | 162 ++++++------
 src/gallium/auxiliary/vl/vl_idct.h               |  21 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 298 +++++------------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   9 +-
 src/gallium/auxiliary/vl/vl_types.h              |   5 -
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     | 217 +++++++++++++++--
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     |  50 ++--
 7 files changed, 388 insertions(+), 374 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index e1e57431a0b..b418aea9514 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -44,14 +44,6 @@
 
 #define NR_RENDER_TARGETS 4
 
-enum VS_INPUT
-{
-   VS_I_RECT,
-   VS_I_VPOS,
-
-   NUM_VS_INPUTS
-};
-
 enum VS_OUTPUT
 {
    VS_O_VPOS,
@@ -99,13 +91,14 @@ calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
 }
 
 static void *
-create_vert_shader(struct vl_idct *idct, bool matrix_stage)
+create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle)
 {
    struct ureg_program *shader;
-   struct ureg_src scale;
-   struct ureg_src vrect, vpos;
+   struct ureg_src vrect, vpos, vblock, eb[4];
+   struct ureg_src scale, blocks_xy, t_eb;
    struct ureg_dst t_tex, t_start;
    struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
+   unsigned label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
@@ -116,9 +109,15 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage)
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+   vblock = ureg_swizzle(vrect, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
 
+   eb[0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0);
+   eb[1] = ureg_DECL_vs_input(shader, VS_I_EB_1_0);
+   eb[2] = ureg_DECL_vs_input(shader, VS_I_EB_0_1);
+   eb[3] = ureg_DECL_vs_input(shader, VS_I_EB_1_1);
+
    o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
    o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
 
@@ -127,38 +126,74 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage)
 
    /*
     * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
+    * blocks_xy = (blocks_x, blocks_y)
     *
-    * t_vpos = vpos + vrect
-    * o_vpos.xy = t_vpos * scale
-    * o_vpos.zw = vpos
+    * ar = vblock.y * blocks.x + vblock.x
+    * if eb[ar].(color_swizzle)
+    *    o_vpos.xy = -1
+    * else
+    *    t_tex = vpos * blocks_xy + vblock
+    *    t_start = t_tex * scale
+    *    t_tex = t_tex + vrect
+    *    o_vpos.xy = t_tex * scale
     *
-    * o_l_addr = calc_addr(...)
-    * o_r_addr = calc_addr(...)
+    *    o_l_addr = calc_addr(...)
+    *    o_r_addr = calc_addr(...)
+    * endif
+    * o_vpos.zw = vpos
     *
     */
+
    scale = ureg_imm2f(shader,
       (float)BLOCK_WIDTH / idct->buffer_width,
       (float)BLOCK_HEIGHT / idct->buffer_height);
 
-   ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
-   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
-   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
-      ureg_scalar(vrect, TGSI_SWIZZLE_X),
-      ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS));
+   blocks_xy = ureg_imm2f(shader, idct->blocks_x, idct->blocks_y);
 
-   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
-   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
+   if (idct->blocks_x > 1 || idct->blocks_y > 1) {
+      struct ureg_dst ar = ureg_DECL_address(shader);
 
-   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
+      ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X),
+               ureg_scalar(vblock, TGSI_SWIZZLE_Y), blocks_xy, vblock);
 
-   if(matrix_stage) {
-      calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
-      calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
+      ureg_ARL(shader, ureg_writemask(ar, TGSI_WRITEMASK_X), ureg_src(t_tex));
+      t_eb = ureg_src_indirect(eb[0], ureg_src(ar));
    } else {
-      calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
-      calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
+      t_eb = eb[0];
    }
 
+   ureg_IF(shader, ureg_scalar(t_eb, color_swizzle), &label);
+
+      ureg_MOV(shader, o_vpos, ureg_imm1f(shader, -1.0f));
+
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+   ureg_ELSE(shader, &label);
+
+      ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, blocks_xy, vblock);
+      ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
+
+      ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), vrect);
+
+      ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
+      ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
+         ureg_scalar(vrect, TGSI_SWIZZLE_X),
+         ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS));
+
+      ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
+
+      if(matrix_stage) {
+         calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
+         calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
+      } else {
+         calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
+         calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
+      }
+
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+   ureg_ENDIF(shader);
+
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
+
    ureg_release_temporary(shader, t_tex);
    ureg_release_temporary(shader, t_start);
 
@@ -326,12 +361,12 @@ create_transpose_frag_shader(struct vl_idct *idct)
 }
 
 static bool
-init_shaders(struct vl_idct *idct)
+init_shaders(struct vl_idct *idct, int color_swizzle)
 {
-   idct->matrix_vs = create_vert_shader(idct, true);
+   idct->matrix_vs = create_vert_shader(idct, true, color_swizzle);
    idct->matrix_fs = create_matrix_frag_shader(idct);
 
-   idct->transpose_vs = create_vert_shader(idct, false);
+   idct->transpose_vs = create_vert_shader(idct, false, color_swizzle);
    idct->transpose_fs = create_transpose_frag_shader(idct);
 
    return
@@ -353,14 +388,13 @@ cleanup_shaders(struct vl_idct *idct)
 static bool
 init_state(struct vl_idct *idct)
 {
-   struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
    struct pipe_sampler_state sampler;
    struct pipe_rasterizer_state rs_state;
    unsigned i;
 
    assert(idct);
 
-   idct->quad = vl_vb_upload_quads(idct->pipe, idct->max_blocks);
+   idct->quad = vl_vb_upload_quads(idct->pipe, idct->blocks_x, idct->blocks_y);
 
    if(idct->quad.buffer == NULL)
       return false;
@@ -393,13 +427,7 @@ init_state(struct vl_idct *idct)
    rs_state.gl_rasterization_rules = false;
    idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
 
-   vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
-
-   /* Pos element */
-   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
-
-   idct->vertex_buffer_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1);
-   idct->vertex_elems_state = idct->pipe->create_vertex_elements_state(idct->pipe, 2, vertex_elems);
+   idct->vertex_elems_state = vl_vb_get_elems_state(idct->pipe, false);
 
    return true;
 }
@@ -473,7 +501,7 @@ cleanup_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 }
 
 static bool
-init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_vertex_buffer stream)
 {
    assert(idct && buffer);
 
@@ -481,12 +509,9 @@ init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    buffer->vertex_bufs.individual.quad.buffer_offset = idct->quad.buffer_offset;
    pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, idct->quad.buffer);
 
-   buffer->vertex_bufs.individual.pos = vl_vb_init(
-      &buffer->blocks, idct->pipe, idct->max_blocks,
-      idct->vertex_buffer_stride);
-
-   if(buffer->vertex_bufs.individual.pos.buffer == NULL)
-      return false;
+   buffer->vertex_bufs.individual.stream.stride = stream.stride;
+   buffer->vertex_bufs.individual.stream.buffer_offset = stream.buffer_offset;
+   pipe_resource_reference(&buffer->vertex_bufs.individual.stream.buffer, stream.buffer);
 
    return true;
 }
@@ -497,9 +522,7 @@ cleanup_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    assert(idct && buffer);
 
    pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL);
-   pipe_resource_reference(&buffer->vertex_bufs.individual.pos.buffer, NULL);
-
-   vl_vb_cleanup(&buffer->blocks);
+   pipe_resource_reference(&buffer->vertex_bufs.individual.stream.buffer, NULL);
 }
 
 struct pipe_resource *
@@ -555,20 +578,19 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
 
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
-                  struct pipe_resource *matrix)
+                  unsigned blocks_x, unsigned blocks_y,
+                  int color_swizzle, struct pipe_resource *matrix)
 {
    assert(idct && pipe && matrix);
 
    idct->pipe = pipe;
    idct->buffer_width = buffer_width;
    idct->buffer_height = buffer_height;
+   idct->blocks_x = blocks_x;
+   idct->blocks_y = blocks_y;
    pipe_resource_reference(&idct->matrix, matrix);
 
-   idct->max_blocks =
-      align(buffer_width, BLOCK_WIDTH) / BLOCK_WIDTH *
-      align(buffer_height, BLOCK_HEIGHT) / BLOCK_HEIGHT;
-
-   if(!init_shaders(idct))
+   if(!init_shaders(idct, color_swizzle))
       return false;
 
    if(!init_state(idct)) {
@@ -589,7 +611,8 @@ vl_idct_cleanup(struct vl_idct *idct)
 }
 
 bool
-vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_resource *dst)
+vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
+                    struct pipe_resource *dst, struct pipe_vertex_buffer stream)
 {
    struct pipe_surface template;
 
@@ -606,7 +629,7 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct
    if (!init_textures(idct, buffer))
       return false;
 
-   if (!init_vertex_buffers(idct, buffer))
+   if (!init_vertex_buffers(idct, buffer, stream))
       return false;
 
    /* init state */
@@ -694,14 +717,12 @@ vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    );
 
    buffer->texels = idct->pipe->transfer_map(idct->pipe, buffer->tex_transfer);
-
-   vl_vb_map(&buffer->blocks, idct->pipe);
 }
 
 void
 vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block)
 {
-   struct vertex2s v;
+   //struct vertex2s v;
    unsigned tex_pitch;
    short *texels;
 
@@ -714,10 +735,6 @@ vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *
 
    for (i = 0; i < BLOCK_HEIGHT; ++i)
       memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
-
-   v.x = x;
-   v.y = y;
-   vl_vb_add_block(&buffer->blocks, &v);
 }
 
 void
@@ -727,19 +744,18 @@ vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 
    idct->pipe->transfer_unmap(idct->pipe, buffer->tex_transfer);
    idct->pipe->transfer_destroy(idct->pipe, buffer->tex_transfer);
-   vl_vb_unmap(&buffer->blocks, idct->pipe);
 }
 
 void
-vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
 {
    unsigned num_verts;
 
    assert(idct);
+   assert(buffer);
 
-   num_verts = vl_vb_restart(&buffer->blocks);
-
-   if(num_verts > 0) {
+   if(num_instances > 0) {
+      num_verts = idct->blocks_x * idct->blocks_y * 4;
 
       idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
       idct->pipe->set_vertex_buffers(idct->pipe, 2, buffer->vertex_bufs.all);
@@ -752,7 +768,7 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer)
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
       idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
-      util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_verts);
+      util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances);
 
       /* second stage */
       idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]);
@@ -761,6 +777,6 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer)
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
       idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
-      util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_verts);
+      util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances);
    }
 }
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index fcba75a7607..78e4a46369d 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -37,8 +37,7 @@ struct vl_idct
 
    unsigned buffer_width;
    unsigned buffer_height;
-
-   unsigned max_blocks;
+   unsigned blocks_x, blocks_y;
 
    void *rs_state;
    void *vertex_elems_state;
@@ -58,8 +57,6 @@ struct vl_idct
 
    struct pipe_resource *matrix;
    struct pipe_vertex_buffer quad;
-
-   unsigned vertex_buffer_stride;
 };
 
 struct vl_idct_buffer
@@ -92,24 +89,24 @@ struct vl_idct_buffer
    union
    {
       struct pipe_vertex_buffer all[2];
-      struct { struct pipe_vertex_buffer quad, pos; } individual;
+      struct { struct pipe_vertex_buffer quad, stream; } individual;
    } vertex_bufs;
 
-   struct vl_vertex_buffer blocks;
-
    struct pipe_transfer *tex_transfer;
    short *texels;
 };
 
 struct pipe_resource *vl_idct_upload_matrix(struct pipe_context *pipe);
 
-bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, 
-                  unsigned buffer_width, unsigned buffer_height, 
-                  struct pipe_resource *matrix);
+bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
+                  unsigned buffer_width, unsigned buffer_height,
+                  unsigned blocks_x, unsigned blocks_y,
+                  int color_swizzle, struct pipe_resource *matrix);
 
 void vl_idct_cleanup(struct vl_idct *idct);
 
-bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_resource *dst);
+bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
+                         struct pipe_resource *dst, struct pipe_vertex_buffer stream);
 
 void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
@@ -119,6 +116,6 @@ void vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, sh
 
 void vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
-void vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer);
+void vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_verts);
 
 #endif
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 2f68ec17042..d1b2144aea5 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -38,40 +38,11 @@
 #include <util/u_draw.h>
 #include <tgsi/tgsi_ureg.h>
 
-#define DEFAULT_BUF_ALIGNMENT 1
 #define MACROBLOCK_WIDTH 16
 #define MACROBLOCK_HEIGHT 16
 #define BLOCK_WIDTH 8
 #define BLOCK_HEIGHT 8
 
-struct vertex_stream
-{
-   struct vertex2s pos;
-   struct vertex2s mv[4];
-   struct {
-      int8_t y;
-      int8_t cr;
-      int8_t cb;
-      int8_t flag;
-   } eb[2][2];
-};
-
-enum VS_INPUT
-{
-   VS_I_RECT,
-   VS_I_VPOS,
-   VS_I_MV0,
-   VS_I_MV1,
-   VS_I_MV2,
-   VS_I_MV3,
-   VS_I_EB_0_0,
-   VS_I_EB_0_1,
-   VS_I_EB_1_0,
-   VS_I_EB_1_1,
-
-   NUM_VS_INPUTS
-};
-
 enum VS_OUTPUT
 {
    VS_O_VPOS,
@@ -514,14 +485,13 @@ static bool
 init_buffers(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_resource *idct_matrix;
-   struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
 
    const unsigned mbw =
       align(r->buffer_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
    const unsigned mbh =
       align(r->buffer_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
 
-   unsigned i, chroma_width, chroma_height;
+   unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
 
    assert(r);
 
@@ -531,51 +501,37 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    if (!(idct_matrix = vl_idct_upload_matrix(r->pipe)))
       return false;
 
-   if (!vl_idct_init(&r->idct_luma, r->pipe, r->buffer_width, r->buffer_height, idct_matrix))
+   if (!vl_idct_init(&r->idct_y, r->pipe, r->buffer_width, r->buffer_height,
+                     2, 2, TGSI_SWIZZLE_X, idct_matrix))
       return false;
 
    if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
       chroma_width = r->buffer_width / 2;
       chroma_height = r->buffer_height / 2;
+      chroma_blocks_x = 1;
+      chroma_blocks_y = 1;
    } else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
       chroma_width = r->buffer_width;
       chroma_height = r->buffer_height / 2;
+      chroma_blocks_x = 2;
+      chroma_blocks_y = 1;
    } else {
       chroma_width = r->buffer_width;
       chroma_height = r->buffer_height;
+      chroma_blocks_x = 2;
+      chroma_blocks_y = 2;
    }
 
-   if(!vl_idct_init(&r->idct_chroma, r->pipe, chroma_width, chroma_height, idct_matrix))
+   if(!vl_idct_init(&r->idct_cr, r->pipe, chroma_width, chroma_height,
+                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
       return false;
 
-   memset(&vertex_elems, 0, sizeof(vertex_elems));
-
-   vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
-   r->quad = vl_vb_upload_quads(r->pipe, r->macroblocks_per_batch);
-
-   /* Position element */
-   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
-
-   for (i = 0; i < 4; ++i)
-      /* motion vector 0..4 element */
-      vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R16G16_SSCALED;
-
-   /* y, cr, cb empty block element top left block */
-   vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
-
-   /* y, cr, cb empty block element top right block */
-   vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
-
-   /* y, cr, cb empty block element bottom left block */
-   vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
-
-   /* y, cr, cb empty block element bottom right block */
-   vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
-
-   r->vertex_stream_stride = vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 9, 1);
+   if(!vl_idct_init(&r->idct_cb, r->pipe, chroma_width, chroma_height,
+                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
+      return false;
 
-   r->vertex_elems_state = r->pipe->create_vertex_elements_state(
-      r->pipe, NUM_VS_INPUTS, vertex_elems);
+   r->quad = vl_vb_upload_quads(r->pipe, 1, 1);
+   r->vertex_elems_state = vl_vb_get_elems_state(r->pipe, true);
 
    if (r->vertex_elems_state == NULL)
       return false;
@@ -597,8 +553,9 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
    r->pipe->delete_vs_state(r->pipe, r->vs);
    r->pipe->delete_fs_state(r->pipe, r->fs);
 
-   vl_idct_cleanup(&r->idct_luma);
-   vl_idct_cleanup(&r->idct_chroma);
+   vl_idct_cleanup(&r->idct_y);
+   vl_idct_cleanup(&r->idct_cr);
+   vl_idct_cleanup(&r->idct_cb);
 
    r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state);
 }
@@ -630,118 +587,6 @@ static struct pipe_sampler_view
    return sampler_view;
 }
 
-static void
-get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4])
-{
-   switch (mb->mb_type) {
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
-      {
-         if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            mv[2].x = mb->pmv[0][1][0];
-            mv[2].y = mb->pmv[0][1][1];
-
-         } else {
-            mv[2].x = mb->pmv[0][1][0];
-            mv[2].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
-
-            mv[3].x = mb->pmv[1][1][0];
-            mv[3].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
-
-            if(mb->mvfs[0][1]) mv[2].y += 2;
-            if(!mb->mvfs[1][1]) mv[3].y -= 2;
-         }
-
-         /* fall-through */
-      }
-      case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
-      {
-         if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
-
-            if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-               mv[0].x = mb->pmv[0][1][0];
-               mv[0].y = mb->pmv[0][1][1];
-
-            } else {
-               mv[0].x = mb->pmv[0][1][0];
-               mv[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
-
-               mv[1].x = mb->pmv[1][1][0];
-               mv[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
-
-               if(mb->mvfs[0][1]) mv[0].y += 2;
-               if(!mb->mvfs[1][1]) mv[1].y -= 2;
-            }
-
-         } else {
-
-            if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-               mv[0].x = mb->pmv[0][0][0];
-               mv[0].y = mb->pmv[0][0][1];
-
-            } else {
-               mv[0].x = mb->pmv[0][0][0];
-               mv[0].y = mb->pmv[0][0][1] - (mb->pmv[0][0][1] % 4);
-
-               mv[1].x = mb->pmv[1][0][0];
-               mv[1].y = mb->pmv[1][0][1] - (mb->pmv[1][0][1] % 4);
-
-               if(mb->mvfs[0][0]) mv[0].y += 2;
-               if(!mb->mvfs[1][0]) mv[1].y -= 2;
-            }
-         }
-      }
-      default:
-         break;
-   }
-}
-
-static void
-grab_vectors(struct vl_mpeg12_mc_renderer *r,
-             struct vl_mpeg12_mc_buffer *buffer,
-             struct pipe_mpeg12_macroblock *mb)
-{
-   struct vertex_stream stream;
-
-   unsigned i, j;
-
-   assert(r);
-   assert(mb);
-
-   stream.pos.x = mb->mbx;
-   stream.pos.y = mb->mby;
-   for ( i = 0; i < 2; ++i) {
-      for ( j = 0; j < 2; ++j) {
-         stream.eb[i][j].y = !(mb->cbp & (*r->empty_block_mask)[0][i][j]);
-         stream.eb[i][j].cr = !(mb->cbp & (*r->empty_block_mask)[1][i][j]);
-         stream.eb[i][j].cb = !(mb->cbp & (*r->empty_block_mask)[2][i][j]);
-      }
-   }
-   stream.eb[0][0].flag = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
-   stream.eb[0][1].flag = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME;
-   stream.eb[1][0].flag = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD;
-   switch (mb->mb_type) {
-      case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
-         stream.eb[1][1].flag = -1;
-         break;
-
-      case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
-         stream.eb[1][1].flag = 1;
-         break;
-
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
-         stream.eb[1][1].flag = 0;
-         break;
-
-      default:
-         assert(0);
-   }
-
-   get_motion_vectors(mb, stream.mv);
-   vl_vb_add_block(&buffer->vertex_stream, &stream);
-}
-
 static void
 grab_blocks(struct vl_mpeg12_mc_renderer *r,
             struct vl_mpeg12_mc_buffer *buffer,
@@ -785,12 +630,9 @@ grab_macroblock(struct vl_mpeg12_mc_renderer *r,
    assert(r);
    assert(mb);
    assert(mb->blocks);
-   assert(buffer->num_macroblocks < r->macroblocks_per_batch);
 
-   grab_vectors(r, buffer, mb);
+   vl_vb_add_block(&buffer->vertex_stream, mb, r->empty_block_mask);
    grab_blocks(r, buffer, mb->mbx, mb->mby, mb->cbp, mb->blocks);
-
-   ++buffer->num_macroblocks;
 }
 
 static void
@@ -878,7 +720,13 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    buffer->surface = NULL;
    buffer->past = NULL;
    buffer->future = NULL;
-   buffer->num_macroblocks = 0;
+
+   buffer->vertex_bufs.individual.quad.stride = renderer->quad.stride;
+   buffer->vertex_bufs.individual.quad.buffer_offset = renderer->quad.buffer_offset;
+   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, renderer->quad.buffer);
+
+   buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, renderer->pipe,
+                                                      renderer->macroblocks_per_batch);
 
    memset(&template, 0, sizeof(struct pipe_resource));
    template.target = PIPE_TEXTURE_2D;
@@ -895,7 +743,9 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
 
    buffer->textures.individual.y = renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
 
-   if (!vl_idct_init_buffer(&renderer->idct_luma, &buffer->idct_y, buffer->textures.individual.y))
+   if (!vl_idct_init_buffer(&renderer->idct_y, &buffer->idct_y,
+                            buffer->textures.individual.y,
+                            buffer->vertex_bufs.individual.stream))
       return false;
 
    if (renderer->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
@@ -910,10 +760,14 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    buffer->textures.individual.cr =
       renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
 
-   if (!vl_idct_init_buffer(&renderer->idct_chroma, &buffer->idct_cb, buffer->textures.individual.cb))
+   if (!vl_idct_init_buffer(&renderer->idct_cb, &buffer->idct_cb,
+                            buffer->textures.individual.cb,
+                            buffer->vertex_bufs.individual.stream))
       return false;
 
-   if (!vl_idct_init_buffer(&renderer->idct_chroma, &buffer->idct_cr, buffer->textures.individual.cr))
+   if (!vl_idct_init_buffer(&renderer->idct_cr, &buffer->idct_cr,
+                            buffer->textures.individual.cr,
+                            buffer->vertex_bufs.individual.stream))
       return false;
 
    for (i = 0; i < 3; ++i) {
@@ -928,14 +782,6 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
          renderer->pipe, buffer->textures.all[i], &sampler_view);
    }
 
-   buffer->vertex_bufs.individual.quad.stride = renderer->quad.stride;
-   buffer->vertex_bufs.individual.quad.buffer_offset = renderer->quad.buffer_offset;
-   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, renderer->quad.buffer);
-
-   buffer->vertex_bufs.individual.stream = vl_vb_init(
-      &buffer->vertex_stream, renderer->pipe, renderer->macroblocks_per_batch,
-      renderer->vertex_stream_stride);
-
    return true;
 }
 
@@ -955,9 +801,9 @@ vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL);
    vl_vb_cleanup(&buffer->vertex_stream);
 
-   vl_idct_cleanup_buffer(&renderer->idct_luma, &buffer->idct_y);
-   vl_idct_cleanup_buffer(&renderer->idct_chroma, &buffer->idct_cb);
-   vl_idct_cleanup_buffer(&renderer->idct_chroma, &buffer->idct_cr);
+   vl_idct_cleanup_buffer(&renderer->idct_y, &buffer->idct_y);
+   vl_idct_cleanup_buffer(&renderer->idct_cb, &buffer->idct_cb);
+   vl_idct_cleanup_buffer(&renderer->idct_cr, &buffer->idct_cr);
 
    pipe_surface_reference(&buffer->surface, NULL);
    pipe_surface_reference(&buffer->past, NULL);
@@ -969,9 +815,9 @@ vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12
 {
    assert(renderer && buffer);
 
-   vl_idct_map_buffers(&renderer->idct_luma, &buffer->idct_y);
-   vl_idct_map_buffers(&renderer->idct_chroma, &buffer->idct_cr);
-   vl_idct_map_buffers(&renderer->idct_chroma, &buffer->idct_cb);
+   vl_idct_map_buffers(&renderer->idct_y, &buffer->idct_y);
+   vl_idct_map_buffers(&renderer->idct_cr, &buffer->idct_cr);
+   vl_idct_map_buffers(&renderer->idct_cb, &buffer->idct_cb);
 
    vl_vb_map(&buffer->vertex_stream, renderer->pipe);
 }
@@ -986,6 +832,8 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
                                          struct pipe_mpeg12_macroblock *mpeg12_macroblocks,
                                          struct pipe_fence_handle **fence)
 {
+   unsigned i;
+
    assert(renderer && buffer);
    assert(surface);
    assert(num_macroblocks);
@@ -1002,26 +850,9 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
       assert(buffer->future == future);
    }
 
-   while (num_macroblocks) {
-      unsigned left_in_batch = renderer->macroblocks_per_batch - buffer->num_macroblocks;
-      unsigned num_to_submit = MIN2(num_macroblocks, left_in_batch);
-      unsigned i;
-
-      for (i = 0; i < num_to_submit; ++i) {
-         assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
-         grab_macroblock(renderer, buffer, &mpeg12_macroblocks[i]);
-      }
-
-      num_macroblocks -= num_to_submit;
-
-      if (buffer->num_macroblocks == renderer->macroblocks_per_batch) {
-         vl_mpeg12_mc_unmap_buffer(renderer, buffer);
-         vl_mpeg12_mc_renderer_flush(renderer, buffer);
-         pipe_surface_reference(&buffer->surface, surface);
-         pipe_surface_reference(&buffer->past, past);
-         pipe_surface_reference(&buffer->future, future);
-         vl_mpeg12_mc_map_buffer(renderer, buffer);
-      }
+   for (i = 0; i < num_macroblocks; ++i) {
+      assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
+      grab_macroblock(renderer, buffer, &mpeg12_macroblocks[i]);
    }
 }
 
@@ -1030,9 +861,9 @@ vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg
 {
    assert(renderer && buffer);
 
-   vl_idct_unmap_buffers(&renderer->idct_luma, &buffer->idct_y);
-   vl_idct_unmap_buffers(&renderer->idct_chroma, &buffer->idct_cr);
-   vl_idct_unmap_buffers(&renderer->idct_chroma, &buffer->idct_cb);
+   vl_idct_unmap_buffers(&renderer->idct_y, &buffer->idct_y);
+   vl_idct_unmap_buffers(&renderer->idct_cr, &buffer->idct_cr);
+   vl_idct_unmap_buffers(&renderer->idct_cb, &buffer->idct_cb);
 
    vl_vb_unmap(&buffer->vertex_stream, renderer->pipe);
 }
@@ -1040,17 +871,18 @@ vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg
 void
 vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
 {
+   unsigned num_not_empty, num_empty;
    assert(renderer && buffer);
-   assert(buffer->num_macroblocks <= renderer->macroblocks_per_batch);
 
-   if (buffer->num_macroblocks == 0)
+   num_not_empty = buffer->vertex_stream.num_not_empty;
+   num_empty = buffer->vertex_stream.num_empty;
+
+   if (num_not_empty == 0 && num_empty == 0)
       return;
 
-   vl_idct_flush(&renderer->idct_luma, &buffer->idct_y);
-   vl_idct_flush(&renderer->idct_chroma, &buffer->idct_cr);
-   vl_idct_flush(&renderer->idct_chroma, &buffer->idct_cb);
-
-   vl_vb_restart(&buffer->vertex_stream);
+   vl_idct_flush(&renderer->idct_y, &buffer->idct_y, num_not_empty);
+   vl_idct_flush(&renderer->idct_cr, &buffer->idct_cr, num_not_empty);
+   vl_idct_flush(&renderer->idct_cb, &buffer->idct_cb, num_not_empty);
 
    renderer->fb_state.cbufs[0] = buffer->surface;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
@@ -1060,18 +892,14 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    renderer->pipe->bind_vertex_elements_state(renderer->pipe, renderer->vertex_elems_state);
 
    if (buffer->past) {
-      buffer->textures.individual.ref[0] = buffer->past->texture;
       buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->past);
    } else {
-      buffer->textures.individual.ref[0] = buffer->surface->texture;
       buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->surface);
    }
 
    if (buffer->future) {
-      buffer->textures.individual.ref[1] = buffer->future->texture;
       buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->future);
    } else {
-      buffer->textures.individual.ref[1] = buffer->surface->texture;
       buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->surface);
    }
 
@@ -1080,7 +908,13 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
 
    renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs);
-   util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, buffer->num_macroblocks);
+
+   if (num_not_empty > 0)
+      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_not_empty);
+
+   if (num_empty > 0)
+      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
+                                 buffer->vertex_stream.size - num_empty, num_empty);
 
    renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, buffer->fence);
 
@@ -1089,5 +923,5 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    pipe_surface_reference(&buffer->past, NULL);
    pipe_surface_reference(&buffer->future, NULL);
 
-   buffer->num_macroblocks = 0;
+   vl_vb_restart(&buffer->vertex_stream);
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 76d6e25ca36..c319064c70f 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -56,12 +56,10 @@ struct vl_mpeg12_mc_renderer
    enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode;
    unsigned macroblocks_per_batch;
 
-   unsigned vertex_stream_stride;
-
    struct pipe_viewport_state viewport;
    struct pipe_framebuffer_state fb_state;
 
-   struct vl_idct idct_luma, idct_chroma;
+   struct vl_idct idct_y, idct_cr, idct_cb;
 
    void *vertex_elems_state;
    void *rs_state;
@@ -93,8 +91,8 @@ struct vl_mpeg12_mc_buffer
 
    union
    {
-      struct pipe_resource *all[5];
-      struct { struct pipe_resource *y, *cb, *cr, *ref[2]; } individual;
+      struct pipe_resource *all[3];
+      struct { struct pipe_resource *y, *cb, *cr; } individual;
    } textures;
 
    union
@@ -107,7 +105,6 @@ struct vl_mpeg12_mc_buffer
 
    struct pipe_surface *surface, *past, *future;
    struct pipe_fence_handle **fence;
-   unsigned num_macroblocks;
 };
 
 bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
diff --git a/src/gallium/auxiliary/vl/vl_types.h b/src/gallium/auxiliary/vl/vl_types.h
index 9c745d73978..a927e829349 100644
--- a/src/gallium/auxiliary/vl/vl_types.h
+++ b/src/gallium/auxiliary/vl/vl_types.h
@@ -43,9 +43,4 @@ struct vertex4f
    float x, y, z, w;
 };
 
-struct quadf
-{
-   struct vertex2f bl, tl, tr, br;
-};
-
 #endif /* vl_types_h */
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 610f37e1f89..2d602b96d3f 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -34,32 +34,43 @@
 #include "vl_vertex_buffers.h"
 #include "vl_types.h"
 
+struct vl_vertex_stream
+{
+   struct vertex2s pos;
+   struct {
+      int8_t y;
+      int8_t cr;
+      int8_t cb;
+      int8_t flag;
+   } eb[2][2];
+   struct vertex2s mv[4];
+};
+
 /* vertices for a quad covering a block */
-static const struct quadf const_quad = {
-   {0.0f, 1.0f}, {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}
+static const struct vertex2f block_quad[4] = {
+   {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
 };
 
 struct pipe_vertex_buffer
-vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks)
+vl_vb_upload_quads(struct pipe_context *pipe, unsigned blocks_x, unsigned blocks_y)
 {
    struct pipe_vertex_buffer quad;
    struct pipe_transfer *buf_transfer;
-   struct quadf *v;
+   struct vertex4f *v;
 
-   unsigned i;
+   unsigned x, y, i;
 
    assert(pipe);
-   assert(max_blocks);
 
    /* create buffer */
-   quad.stride = sizeof(struct vertex2f);
+   quad.stride = sizeof(struct vertex4f);
    quad.buffer_offset = 0;
    quad.buffer = pipe_buffer_create
    (
       pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
       PIPE_USAGE_STATIC,
-      sizeof(struct vertex2f) * 4 * max_blocks
+      sizeof(struct vertex4f) * 4 * blocks_x * blocks_y
    );
 
    if(!quad.buffer)
@@ -74,15 +85,24 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks)
       &buf_transfer
    );
 
-   for ( i = 0; i < max_blocks; ++i)
-     memcpy(v + i, &const_quad, sizeof(const_quad));
+   for ( y = 0; y < blocks_y; ++y) {
+      for ( x = 0; x < blocks_x; ++x) {
+         for (i = 0; i < 4; ++i, ++v) {
+            v->x = block_quad[i].x;
+            v->y = block_quad[i].y;
+
+            v->z = x;
+            v->w = y;
+         }
+      }
+   }
 
    pipe_buffer_unmap(pipe, buf_transfer);
 
    return quad;
 }
 
-struct pipe_vertex_element
+static struct pipe_vertex_element
 vl_vb_get_quad_vertex_element(void)
 {
    struct pipe_vertex_element element;
@@ -91,12 +111,12 @@ vl_vb_get_quad_vertex_element(void)
    element.src_offset = 0;
    element.instance_divisor = 0;
    element.vertex_buffer_index = 0;
-   element.src_format = PIPE_FORMAT_R32G32_FLOAT;
+   element.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
 
    return element;
 }
 
-unsigned
+static void
 vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements,
                               unsigned vertex_buffer_index)
 {
@@ -110,29 +130,61 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements
       elements[i].vertex_buffer_index = vertex_buffer_index;
       offset += util_format_get_blocksize(elements[i].src_format);
    }
+}
+
+void *
+vl_vb_get_elems_state(struct pipe_context *pipe, bool include_mvs)
+{
+   struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
+
+   unsigned i;
+
+   memset(&vertex_elems, 0, sizeof(vertex_elems));
+   vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
+
+   /* Position element */
+   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
+
+   /* y, cr, cb empty block element top left block */
+   vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
+
+   /* y, cr, cb empty block element top right block */
+   vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
+
+   /* y, cr, cb empty block element bottom left block */
+   vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
+
+   /* y, cr, cb empty block element bottom right block */
+   vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
 
-   return offset;
+   for (i = 0; i < 4; ++i)
+      /* motion vector 0..4 element */
+      vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R16G16_SSCALED;
+
+   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - (include_mvs ? 1 : 5), 1);
+
+   return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS - (include_mvs ? 0 : 4), vertex_elems);
 }
 
 struct pipe_vertex_buffer
-vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
-           unsigned max_blocks, unsigned stride)
+vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned size)
 {
    struct pipe_vertex_buffer buf;
 
    assert(buffer);
 
-   buffer->num_verts = 0;
-   buffer->stride = stride;
+   buffer->size = size;
+   buffer->num_not_empty = 0;
+   buffer->num_empty = 0;
 
-   buf.stride = stride;
+   buf.stride = sizeof(struct vl_vertex_stream);
    buf.buffer_offset = 0;
    buf.buffer = pipe_buffer_create
    (
       pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
       PIPE_USAGE_STREAM,
-      stride * 4 * max_blocks
+      sizeof(struct vl_vertex_stream) * size
    );
 
    pipe_resource_reference(&buffer->resource, buf.buffer);
@@ -147,13 +199,129 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 {
    assert(buffer && pipe);
 
-   buffer->vectors = pipe_buffer_map
+   buffer->start = pipe_buffer_map
    (
       pipe,
       buffer->resource,
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &buffer->transfer
    );
+   buffer->end = buffer->start + buffer->resource->width0 / sizeof(struct vl_vertex_stream);
+}
+
+static void
+get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4])
+{
+   switch (mb->mb_type) {
+      case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
+      {
+         if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+            mv[2].x = mb->pmv[0][1][0];
+            mv[2].y = mb->pmv[0][1][1];
+
+         } else {
+            mv[2].x = mb->pmv[0][1][0];
+            mv[2].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
+
+            mv[3].x = mb->pmv[1][1][0];
+            mv[3].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
+
+            if(mb->mvfs[0][1]) mv[2].y += 2;
+            if(!mb->mvfs[1][1]) mv[3].y -= 2;
+         }
+
+         /* fall-through */
+      }
+      case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
+      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
+      {
+         if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
+
+            if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+               mv[0].x = mb->pmv[0][1][0];
+               mv[0].y = mb->pmv[0][1][1];
+
+            } else {
+               mv[0].x = mb->pmv[0][1][0];
+               mv[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
+
+               mv[1].x = mb->pmv[1][1][0];
+               mv[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
+
+               if(mb->mvfs[0][1]) mv[0].y += 2;
+               if(!mb->mvfs[1][1]) mv[1].y -= 2;
+            }
+
+         } else {
+
+            if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+               mv[0].x = mb->pmv[0][0][0];
+               mv[0].y = mb->pmv[0][0][1];
+
+            } else {
+               mv[0].x = mb->pmv[0][0][0];
+               mv[0].y = mb->pmv[0][0][1] - (mb->pmv[0][0][1] % 4);
+
+               mv[1].x = mb->pmv[1][0][0];
+               mv[1].y = mb->pmv[1][0][1] - (mb->pmv[1][0][1] % 4);
+
+               if(mb->mvfs[0][0]) mv[0].y += 2;
+               if(!mb->mvfs[1][0]) mv[1].y -= 2;
+            }
+         }
+      }
+      default:
+         break;
+   }
+}
+
+void
+vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb,
+                const unsigned (*empty_block_mask)[3][2][2])
+{
+   struct vl_vertex_stream *stream;
+   unsigned i, j;
+
+   assert(buffer);
+   assert(mb);
+
+   if(mb->cbp)
+      stream = buffer->start + buffer->num_not_empty++;
+   else
+      stream = buffer->end - ++buffer->num_empty;
+
+   stream->pos.x = mb->mbx;
+   stream->pos.y = mb->mby;
+
+   for ( i = 0; i < 2; ++i) {
+      for ( j = 0; j < 2; ++j) {
+         stream->eb[i][j].y = !(mb->cbp & (*empty_block_mask)[0][i][j]);
+         stream->eb[i][j].cr = !(mb->cbp & (*empty_block_mask)[1][i][j]);
+         stream->eb[i][j].cb = !(mb->cbp & (*empty_block_mask)[2][i][j]);
+      }
+   }
+   stream->eb[0][0].flag = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
+   stream->eb[0][1].flag = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME;
+   stream->eb[1][0].flag = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD;
+   switch (mb->mb_type) {
+      case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
+         stream->eb[1][1].flag = -1;
+         break;
+
+      case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
+      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
+         stream->eb[1][1].flag = 1;
+         break;
+
+      case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
+         stream->eb[1][1].flag = 0;
+         break;
+
+      default:
+         assert(0);
+   }
+
+   get_motion_vectors(mb, stream->mv);
 }
 
 void
@@ -164,14 +332,13 @@ vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
    pipe_buffer_unmap(pipe, buffer->transfer);
 }
 
-unsigned
+void
 vl_vb_restart(struct vl_vertex_buffer *buffer)
 {
    assert(buffer);
 
-   unsigned todo = buffer->num_verts;
-   buffer->num_verts = 0;
-   return todo;
+   buffer->num_not_empty = 0;
+   buffer->num_empty = 0;
 }
 
 void
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index ca06abe2027..4400bda6274 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -29,45 +29,53 @@
 
 #include <assert.h>
 #include <pipe/p_state.h>
+#include <pipe/p_video_state.h>
 #include "vl_types.h"
 
+enum VS_INPUT
+{
+   VS_I_RECT,
+   VS_I_VPOS,
+   VS_I_EB_0_0,
+   VS_I_EB_0_1,
+   VS_I_EB_1_0,
+   VS_I_EB_1_1,
+   VS_I_MV0,
+   VS_I_MV1,
+   VS_I_MV2,
+   VS_I_MV3,
+
+   NUM_VS_INPUTS
+};
+
 struct vl_vertex_buffer
 {
-   unsigned num_verts;
-   unsigned stride;
+   unsigned size;
+   unsigned num_not_empty;
+   unsigned num_empty;
    struct pipe_resource *resource;
    struct pipe_transfer *transfer;
-   void *vectors;
+   struct vl_vertex_stream *start;
+   struct vl_vertex_stream *end;
 };
 
-struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe, unsigned max_blocks);
+struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe,
+                                             unsigned blocks_x, unsigned blocks_y);
 
-struct pipe_vertex_element vl_vb_get_quad_vertex_element(void);
-
-unsigned vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements,
-                              unsigned vertex_buffer_index);
+void *vl_vb_get_elems_state(struct pipe_context *pipe, bool include_mvs);
 
 struct pipe_vertex_buffer vl_vb_init(struct vl_vertex_buffer *buffer,
                                      struct pipe_context *pipe,
-                                     unsigned max_blocks, unsigned stride);
+                                     unsigned max_blocks);
 
 void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
-static inline void
-vl_vb_add_block(struct vl_vertex_buffer *buffer, void *elements)
-{
-   void *pos;
-
-   assert(buffer);
-
-   pos = buffer->vectors + buffer->num_verts * buffer->stride;
-   memcpy(pos, elements, buffer->stride);
-   buffer->num_verts++;
-}
+void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb,
+                     const unsigned (*empty_block_mask)[3][2][2]);
 
 void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
-unsigned vl_vb_restart(struct vl_vertex_buffer *buffer);
+void vl_vb_restart(struct vl_vertex_buffer *buffer);
 
 void vl_vb_cleanup(struct vl_vertex_buffer *buffer);
 
-- 
cgit v1.2.3


From 37a548c9d1db6bbf8712277f678d850f34d0e445 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 8 Mar 2011 21:30:33 +0100
Subject: [g3dvl] start to cleanup the mess

Move the vertex buffer out of the mc code
---
 src/gallium/auxiliary/vl/vl_idct.c               | 41 +---------
 src/gallium/auxiliary/vl/vl_idct.h               | 10 +--
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 99 +++++-------------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h | 29 +------
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     |  9 ++-
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     |  4 +-
 src/gallium/drivers/r600/r600_video_context.c    |  1 -
 src/gallium/drivers/softpipe/sp_video_context.c  | 77 ++++++++++++------
 src/gallium/drivers/softpipe/sp_video_context.h  | 24 +++++-
 9 files changed, 112 insertions(+), 182 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index b418aea9514..f191a898edd 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -394,11 +394,6 @@ init_state(struct vl_idct *idct)
 
    assert(idct);
 
-   idct->quad = vl_vb_upload_quads(idct->pipe, idct->blocks_x, idct->blocks_y);
-
-   if(idct->quad.buffer == NULL)
-      return false;
-
    for (i = 0; i < 4; ++i) {
       memset(&sampler, 0, sizeof(sampler));
       sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
@@ -427,8 +422,6 @@ init_state(struct vl_idct *idct)
    rs_state.gl_rasterization_rules = false;
    idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
 
-   idct->vertex_elems_state = vl_vb_get_elems_state(idct->pipe, false);
-
    return true;
 }
 
@@ -441,7 +434,6 @@ cleanup_state(struct vl_idct *idct)
       idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
 
    idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
-   idct->pipe->delete_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
 }
 
 static bool
@@ -500,31 +492,6 @@ cleanup_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    }
 }
 
-static bool
-init_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_vertex_buffer stream)
-{
-   assert(idct && buffer);
-
-   buffer->vertex_bufs.individual.quad.stride = idct->quad.stride;
-   buffer->vertex_bufs.individual.quad.buffer_offset = idct->quad.buffer_offset;
-   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, idct->quad.buffer);
-
-   buffer->vertex_bufs.individual.stream.stride = stream.stride;
-   buffer->vertex_bufs.individual.stream.buffer_offset = stream.buffer_offset;
-   pipe_resource_reference(&buffer->vertex_bufs.individual.stream.buffer, stream.buffer);
-
-   return true;
-}
-
-static void
-cleanup_vertex_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
-{
-   assert(idct && buffer);
-
-   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL);
-   pipe_resource_reference(&buffer->vertex_bufs.individual.stream.buffer, NULL);
-}
-
 struct pipe_resource *
 vl_idct_upload_matrix(struct pipe_context *pipe)
 {
@@ -612,7 +579,7 @@ vl_idct_cleanup(struct vl_idct *idct)
 
 bool
 vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
-                    struct pipe_resource *dst, struct pipe_vertex_buffer stream)
+                    struct pipe_resource *dst)
 {
    struct pipe_surface template;
 
@@ -629,9 +596,6 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
    if (!init_textures(idct, buffer))
       return false;
 
-   if (!init_vertex_buffers(idct, buffer, stream))
-      return false;
-
    /* init state */
    buffer->viewport[0].scale[0] = buffer->textures.individual.intermediate->width0;
    buffer->viewport[0].scale[1] = buffer->textures.individual.intermediate->height0;
@@ -693,7 +657,6 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    idct->pipe->surface_destroy(idct->pipe, buffer->fb_state[1].cbufs[0]);
 
    cleanup_textures(idct, buffer);
-   cleanup_vertex_buffers(idct, buffer);
 }
 
 void
@@ -758,8 +721,6 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_
       num_verts = idct->blocks_x * idct->blocks_y * 4;
 
       idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
-      idct->pipe->set_vertex_buffers(idct->pipe, 2, buffer->vertex_bufs.all);
-      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
 
       /* first stage */
       idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]);
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 78e4a46369d..0ff12cf466d 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -40,7 +40,6 @@ struct vl_idct
    unsigned blocks_x, blocks_y;
 
    void *rs_state;
-   void *vertex_elems_state;
 
    union
    {
@@ -56,7 +55,6 @@ struct vl_idct
    void *matrix_fs, *transpose_fs;
 
    struct pipe_resource *matrix;
-   struct pipe_vertex_buffer quad;
 };
 
 struct vl_idct_buffer
@@ -86,12 +84,6 @@ struct vl_idct_buffer
       } individual;
    } textures;
 
-   union
-   {
-      struct pipe_vertex_buffer all[2];
-      struct { struct pipe_vertex_buffer quad, stream; } individual;
-   } vertex_bufs;
-
    struct pipe_transfer *tex_transfer;
    short *texels;
 };
@@ -106,7 +98,7 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
 void vl_idct_cleanup(struct vl_idct *idct);
 
 bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
-                         struct pipe_resource *dst, struct pipe_vertex_buffer stream);
+                         struct pipe_resource *dst);
 
 void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index d1b2144aea5..160388ad049 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -485,19 +485,10 @@ static bool
 init_buffers(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_resource *idct_matrix;
-
-   const unsigned mbw =
-      align(r->buffer_width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH;
-   const unsigned mbh =
-      align(r->buffer_height, MACROBLOCK_HEIGHT) / MACROBLOCK_HEIGHT;
-
    unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
 
    assert(r);
 
-   r->macroblocks_per_batch =
-      mbw * (r->bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE ? mbh : 1);
-
    if (!(idct_matrix = vl_idct_upload_matrix(r->pipe)))
       return false;
 
@@ -530,12 +521,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
                     chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
       return false;
 
-   r->quad = vl_vb_upload_quads(r->pipe, 1, 1);
-   r->vertex_elems_state = vl_vb_get_elems_state(r->pipe, true);
-
-   if (r->vertex_elems_state == NULL)
-      return false;
-
    r->vs = create_vert_shader(r);
    r->fs = create_frag_shader(r);
 
@@ -556,8 +541,6 @@ cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
    vl_idct_cleanup(&r->idct_y);
    vl_idct_cleanup(&r->idct_cr);
    vl_idct_cleanup(&r->idct_cb);
-
-   r->pipe->delete_vertex_elements_state(r->pipe, r->vertex_elems_state);
 }
 
 static struct pipe_sampler_view
@@ -622,19 +605,6 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r,
    }
 }
 
-static void
-grab_macroblock(struct vl_mpeg12_mc_renderer *r,
-                struct vl_mpeg12_mc_buffer *buffer,
-                struct pipe_mpeg12_macroblock *mb)
-{
-   assert(r);
-   assert(mb);
-   assert(mb->blocks);
-
-   vl_vb_add_block(&buffer->vertex_stream, mb, r->empty_block_mask);
-   grab_blocks(r, buffer, mb->mbx, mb->mby, mb->cbp, mb->blocks);
-}
-
 static void
 texview_map_delete(const struct keymap *map,
                    const void *key, void *data,
@@ -655,22 +625,17 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            struct pipe_context *pipe,
                            unsigned buffer_width,
                            unsigned buffer_height,
-                           enum pipe_video_chroma_format chroma_format,
-                           enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode)
+                           enum pipe_video_chroma_format chroma_format)
 {
    assert(renderer);
    assert(pipe);
 
-   /* TODO: Implement other policies */
-   assert(bufmode == VL_MPEG12_MC_RENDERER_BUFFER_PICTURE);
-
    memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
 
    renderer->pipe = pipe;
    renderer->buffer_width = buffer_width;
    renderer->buffer_height = buffer_height;
    renderer->chroma_format = chroma_format;
-   renderer->bufmode = bufmode;
 
    /* TODO: Implement 422, 444 */
    assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
@@ -721,13 +686,6 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    buffer->past = NULL;
    buffer->future = NULL;
 
-   buffer->vertex_bufs.individual.quad.stride = renderer->quad.stride;
-   buffer->vertex_bufs.individual.quad.buffer_offset = renderer->quad.buffer_offset;
-   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, renderer->quad.buffer);
-
-   buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, renderer->pipe,
-                                                      renderer->macroblocks_per_batch);
-
    memset(&template, 0, sizeof(struct pipe_resource));
    template.target = PIPE_TEXTURE_2D;
    /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
@@ -743,9 +701,7 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
 
    buffer->textures.individual.y = renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
 
-   if (!vl_idct_init_buffer(&renderer->idct_y, &buffer->idct_y,
-                            buffer->textures.individual.y,
-                            buffer->vertex_bufs.individual.stream))
+   if (!vl_idct_init_buffer(&renderer->idct_y, &buffer->idct_y, buffer->textures.individual.y))
       return false;
 
    if (renderer->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
@@ -760,14 +716,10 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    buffer->textures.individual.cr =
       renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
 
-   if (!vl_idct_init_buffer(&renderer->idct_cb, &buffer->idct_cb,
-                            buffer->textures.individual.cb,
-                            buffer->vertex_bufs.individual.stream))
+   if (!vl_idct_init_buffer(&renderer->idct_cb, &buffer->idct_cb, buffer->textures.individual.cb))
       return false;
 
-   if (!vl_idct_init_buffer(&renderer->idct_cr, &buffer->idct_cr,
-                            buffer->textures.individual.cr,
-                            buffer->vertex_bufs.individual.stream))
+   if (!vl_idct_init_buffer(&renderer->idct_cr, &buffer->idct_cr, buffer->textures.individual.cr))
       return false;
 
    for (i = 0; i < 3; ++i) {
@@ -794,13 +746,9 @@ vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
 
    for (i = 0; i < 3; ++i) {
       pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
-      pipe_resource_reference(&buffer->vertex_bufs.all[i].buffer, NULL);
       pipe_resource_reference(&buffer->textures.all[i], NULL);
    }
 
-   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, NULL);
-   vl_vb_cleanup(&buffer->vertex_stream);
-
    vl_idct_cleanup_buffer(&renderer->idct_y, &buffer->idct_y);
    vl_idct_cleanup_buffer(&renderer->idct_cb, &buffer->idct_cb);
    vl_idct_cleanup_buffer(&renderer->idct_cr, &buffer->idct_cr);
@@ -818,8 +766,6 @@ vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12
    vl_idct_map_buffers(&renderer->idct_y, &buffer->idct_y);
    vl_idct_map_buffers(&renderer->idct_cr, &buffer->idct_cr);
    vl_idct_map_buffers(&renderer->idct_cb, &buffer->idct_cb);
-
-   vl_vb_map(&buffer->vertex_stream, renderer->pipe);
 }
 
 void
@@ -851,8 +797,10 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
    }
 
    for (i = 0; i < num_macroblocks; ++i) {
-      assert(mpeg12_macroblocks[i].base.codec == PIPE_VIDEO_CODEC_MPEG12);
-      grab_macroblock(renderer, buffer, &mpeg12_macroblocks[i]);
+      struct pipe_mpeg12_macroblock *mb = &mpeg12_macroblocks[i];
+
+      assert(mb->base.codec == PIPE_VIDEO_CODEC_MPEG12);
+      grab_blocks(renderer, buffer, mb->mbx, mb->mby, mb->cbp, mb->blocks);
    }
 }
 
@@ -864,32 +812,26 @@ vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg
    vl_idct_unmap_buffers(&renderer->idct_y, &buffer->idct_y);
    vl_idct_unmap_buffers(&renderer->idct_cr, &buffer->idct_cr);
    vl_idct_unmap_buffers(&renderer->idct_cb, &buffer->idct_cb);
-
-   vl_vb_unmap(&buffer->vertex_stream, renderer->pipe);
 }
 
 void
-vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
+vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
+                            unsigned not_empty_start_instance, unsigned not_empty_num_instances,
+                            unsigned empty_start_instance, unsigned empty_num_instances)
 {
-   unsigned num_not_empty, num_empty;
    assert(renderer && buffer);
 
-   num_not_empty = buffer->vertex_stream.num_not_empty;
-   num_empty = buffer->vertex_stream.num_empty;
-
-   if (num_not_empty == 0 && num_empty == 0)
+   if (not_empty_num_instances == 0 && empty_num_instances == 0)
       return;
 
-   vl_idct_flush(&renderer->idct_y, &buffer->idct_y, num_not_empty);
-   vl_idct_flush(&renderer->idct_cr, &buffer->idct_cr, num_not_empty);
-   vl_idct_flush(&renderer->idct_cb, &buffer->idct_cb, num_not_empty);
+   vl_idct_flush(&renderer->idct_y, &buffer->idct_y, not_empty_num_instances);
+   vl_idct_flush(&renderer->idct_cr, &buffer->idct_cr, not_empty_num_instances);
+   vl_idct_flush(&renderer->idct_cb, &buffer->idct_cb, not_empty_num_instances);
 
    renderer->fb_state.cbufs[0] = buffer->surface;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
    renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
-   renderer->pipe->set_vertex_buffers(renderer->pipe, 2, buffer->vertex_bufs.all);
-   renderer->pipe->bind_vertex_elements_state(renderer->pipe, renderer->vertex_elems_state);
 
    if (buffer->past) {
       buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->past);
@@ -909,12 +851,13 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs);
 
-   if (num_not_empty > 0)
-      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_not_empty);
+   if (not_empty_num_instances > 0)
+      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
+                                 not_empty_start_instance, not_empty_num_instances);
 
-   if (num_empty > 0)
+   if (empty_num_instances > 0)
       util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
-                                 buffer->vertex_stream.size - num_empty, num_empty);
+                                 empty_start_instance, empty_num_instances);
 
    renderer->pipe->flush(renderer->pipe, PIPE_FLUSH_RENDER_CACHE, buffer->fence);
 
@@ -922,6 +865,4 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    pipe_surface_reference(&buffer->surface, NULL);
    pipe_surface_reference(&buffer->past, NULL);
    pipe_surface_reference(&buffer->future, NULL);
-
-   vl_vb_restart(&buffer->vertex_stream);
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index c319064c70f..86a6518e3c2 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -39,13 +39,6 @@ struct pipe_context;
 struct pipe_macroblock;
 struct keymap;
 
-/* A slice is video-width (rounded up to a multiple of macroblock width) x macroblock height */
-enum VL_MPEG12_MC_RENDERER_BUFFER_MODE
-{
-   VL_MPEG12_MC_RENDERER_BUFFER_SLICE,  /* Saves memory at the cost of smaller batches */
-   VL_MPEG12_MC_RENDERER_BUFFER_PICTURE /* Larger batches, more memory */
-};
-
 struct vl_mpeg12_mc_renderer
 {
    struct pipe_context *pipe;
@@ -53,21 +46,16 @@ struct vl_mpeg12_mc_renderer
    unsigned buffer_height;
    enum pipe_video_chroma_format chroma_format;
    const unsigned (*empty_block_mask)[3][2][2];
-   enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode;
-   unsigned macroblocks_per_batch;
 
    struct pipe_viewport_state viewport;
    struct pipe_framebuffer_state fb_state;
 
    struct vl_idct idct_y, idct_cr, idct_cb;
 
-   void *vertex_elems_state;
    void *rs_state;
 
    void *vs, *fs;
 
-   struct pipe_vertex_buffer quad;
-
    union
    {
       void *all[5];
@@ -81,8 +69,6 @@ struct vl_mpeg12_mc_buffer
 {
    struct vl_idct_buffer idct_y, idct_cb, idct_cr;
 
-   struct vl_vertex_buffer vertex_stream;
-
    union
    {
       struct pipe_sampler_view *all[5];
@@ -95,14 +81,6 @@ struct vl_mpeg12_mc_buffer
       struct { struct pipe_resource *y, *cb, *cr; } individual;
    } textures;
 
-   union
-   {
-      struct pipe_vertex_buffer all[2];
-      struct {
-         struct pipe_vertex_buffer quad, stream;
-      } individual;
-   } vertex_bufs;
-
    struct pipe_surface *surface, *past, *future;
    struct pipe_fence_handle **fence;
 };
@@ -111,8 +89,7 @@ bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                                 struct pipe_context *pipe,
                                 unsigned picture_width,
                                 unsigned picture_height,
-                                enum pipe_video_chroma_format chroma_format,
-                                enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode);
+                                enum pipe_video_chroma_format chroma_format);
 
 void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 
@@ -133,6 +110,8 @@ void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *rend
 
 void vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer);
 
-void vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer);
+void vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
+                                 unsigned not_empty_start_instance, unsigned not_empty_num_instances,
+                                 unsigned empty_start_instance, unsigned empty_num_instances);
 
 #endif /* vl_mpeg12_mc_renderer_h */
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 2d602b96d3f..41e9809f744 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -333,10 +333,17 @@ vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 }
 
 void
-vl_vb_restart(struct vl_vertex_buffer *buffer)
+vl_vb_restart(struct vl_vertex_buffer *buffer,
+              unsigned *not_empty_start_instance, unsigned *not_empty_num_instances,
+              unsigned *empty_start_instance, unsigned *empty_num_instances)
 {
    assert(buffer);
 
+   *not_empty_start_instance = 0;
+   *not_empty_num_instances = buffer->num_not_empty;
+   *empty_start_instance = buffer->size - buffer->num_empty;
+   *empty_num_instances = buffer->num_empty;
+
    buffer->num_not_empty = 0;
    buffer->num_empty = 0;
 }
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 4400bda6274..0f7f47f2703 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -75,7 +75,9 @@ void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macrobl
 
 void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
-void vl_vb_restart(struct vl_vertex_buffer *buffer);
+void vl_vb_restart(struct vl_vertex_buffer *buffer,
+                   unsigned *not_empty_start_instance, unsigned *not_empty_num_instances,
+                   unsigned *empty_start_instance, unsigned *empty_num_instances);
 
 void vl_vb_cleanup(struct vl_vertex_buffer *buffer);
 
diff --git a/src/gallium/drivers/r600/r600_video_context.c b/src/gallium/drivers/r600/r600_video_context.c
index b3885db0f55..8a569cd0c65 100644
--- a/src/gallium/drivers/r600/r600_video_context.c
+++ b/src/gallium/drivers/r600/r600_video_context.c
@@ -15,7 +15,6 @@ r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
       return NULL;
 
    return sp_video_create_ex(pipe, profile, chroma_format, width, height,
-                             VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
                              true,
                              PIPE_FORMAT_VUYX);
 }
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index daebbc54ad8..e733399b59a 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -38,19 +38,28 @@
 #include "sp_public.h"
 #include "sp_texture.h"
 
+#define MACROBLOCK_WIDTH 16
+#define MACROBLOCK_HEIGHT 16
+
 #define NUM_BUFFERS 2
 
 static void
 flush_buffer(struct sp_mpeg12_context *ctx)
 {
+   unsigned ne_start, ne_num, e_start, e_num;
    assert(ctx);
 
-   if(ctx->mc_buffer != NULL) {
+   if(ctx->cur_buffer != NULL) {
+
+      vl_vb_unmap(&ctx->cur_buffer->vertex_stream, ctx->pipe);
+      vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer, &ctx->cur_buffer->mc);
+      vl_vb_restart(&ctx->cur_buffer->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
 
-      vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer, ctx->mc_buffer);
-      vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, ctx->mc_buffer);
+      ctx->pipe->set_vertex_buffers(ctx->pipe, 2, ctx->cur_buffer->vertex_bufs.all);
+      ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
+      vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &ctx->cur_buffer->mc, ne_start, ne_num, e_start, e_num);
 
-      ctx->mc_buffer = NULL;
+      ctx->cur_buffer = NULL;
    }
 }
 
@@ -58,21 +67,28 @@ static void
 rotate_buffer(struct sp_mpeg12_context *ctx)
 {
    static unsigned key = 0;
-   struct vl_mpeg12_mc_buffer *buffer;
+   struct sp_mpeg12_buffer *buffer;
 
    assert(ctx);
 
    flush_buffer(ctx);
 
-   buffer = (struct vl_mpeg12_mc_buffer*)util_keymap_lookup(ctx->buffer_map, &key);
+   buffer = (struct sp_mpeg12_buffer*)util_keymap_lookup(ctx->buffer_map, &key);
    if (!buffer) {
       boolean added_to_map;
 
-      buffer = CALLOC_STRUCT(vl_mpeg12_mc_buffer);
+      buffer = CALLOC_STRUCT(sp_mpeg12_buffer);
       if (buffer == NULL)
          return;
 
-      if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, buffer)) {
+      buffer->vertex_bufs.individual.quad.stride = ctx->quads.stride;
+      buffer->vertex_bufs.individual.quad.buffer_offset = ctx->quads.buffer_offset;
+      pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, ctx->quads.buffer);
+
+      buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, ctx->pipe,
+                                                         ctx->vertex_buffer_size);
+
+      if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &buffer->mc)) {
          FREE(buffer);
          return;
       }
@@ -82,9 +98,10 @@ rotate_buffer(struct sp_mpeg12_context *ctx)
    }
    ++key;
    key %= NUM_BUFFERS;
-   ctx->mc_buffer = buffer;
+   ctx->cur_buffer = buffer;
 
-   vl_mpeg12_mc_map_buffer(&ctx->mc_renderer, ctx->mc_buffer);
+   vl_vb_map(&ctx->cur_buffer->vertex_stream, ctx->pipe);
+   vl_mpeg12_mc_map_buffer(&ctx->mc_renderer, &ctx->cur_buffer->mc);
 }
 
 static void
@@ -93,14 +110,15 @@ delete_buffer(const struct keymap *map,
               void *user)
 {
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)user;
-   struct vl_mpeg12_mc_buffer *buf = (struct vl_mpeg12_mc_buffer*)data;
+   struct sp_mpeg12_buffer *buf = (struct sp_mpeg12_buffer*)data;
 
    assert(map);
    assert(key);
    assert(data);
    assert(user);
 
-   vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, buf);
+   vl_vb_cleanup(&buf->vertex_stream);
+   vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &buf->mc);
 }
 
 static void
@@ -124,6 +142,8 @@ sp_mpeg12_destroy(struct pipe_video_context *vpipe)
    vl_compositor_cleanup(&ctx->compositor);
    util_delete_keymap(ctx->buffer_map, ctx);
    vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
+   pipe_resource_reference(&ctx->quads.buffer, NULL);
    ctx->pipe->destroy(ctx->pipe);
 
    FREE(ctx);
@@ -194,16 +214,20 @@ sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
 {
    struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
    struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks;
+   unsigned i;
 
    assert(vpipe);
    assert(num_macroblocks);
    assert(macroblocks);
    assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
    assert(ctx->decode_target);
-   assert(ctx->mc_buffer);
+   assert(ctx->cur_buffer);
+
+   for ( i = 0; i < num_macroblocks; ++i )
+      vl_vb_add_block(&ctx->cur_buffer->vertex_stream, &mpeg12_macroblocks[i], ctx->mc_renderer.empty_block_mask);
 
    vl_mpeg12_mc_renderer_render_macroblocks(&ctx->mc_renderer,
-                                            ctx->mc_buffer,
+                                            &ctx->cur_buffer->mc,
                                             ctx->decode_target,
                                             past, future, num_macroblocks,
                                             mpeg12_macroblocks, fence);
@@ -411,7 +435,7 @@ sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
    assert(vpipe);
    assert(dt);
 
-   if (ctx->decode_target != dt || ctx->mc_buffer == NULL) {
+   if (ctx->decode_target != dt || ctx->cur_buffer == NULL) {
       rotate_buffer(ctx);
 
       pipe_surface_reference(&ctx->decode_target, dt);
@@ -466,7 +490,7 @@ init_pipe_state(struct sp_mpeg12_context *ctx)
    rast.offset_units = 1;
    rast.offset_scale = 1;
    rast.gl_rasterization_rules = 1;
-   
+
    ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
    ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
 
@@ -514,7 +538,6 @@ static struct pipe_video_context *
 sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
                  enum pipe_video_chroma_format chroma_format,
                  unsigned width, unsigned height,
-                 enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
                  bool pot_buffers,
                  enum pipe_format decode_format)
 {
@@ -531,8 +554,8 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
    /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
    assert(pot_buffers);
 
-   buffer_width = pot_buffers ? util_next_power_of_two(width) : width; 
-   buffer_height = pot_buffers ? util_next_power_of_two(height) : height; 
+   buffer_width = pot_buffers ? util_next_power_of_two(width) : width;
+   buffer_height = pot_buffers ? util_next_power_of_two(height) : height;
 
    ctx->base.profile = profile;
    ctx->base.chroma_format = chroma_format;
@@ -564,9 +587,18 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
    ctx->pipe = pipe;
    ctx->decode_format = decode_format;
 
+   ctx->quads = vl_vb_upload_quads(ctx->pipe, 2, 2);
+   ctx->vertex_buffer_size = width / MACROBLOCK_WIDTH * height / MACROBLOCK_HEIGHT;
+   ctx->vertex_elems_state = vl_vb_get_elems_state(ctx->pipe, true);
+
+   if (ctx->vertex_elems_state == NULL) {
+      ctx->pipe->destroy(ctx->pipe);
+      FREE(ctx);
+      return NULL;
+   }
+
    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
-                                   buffer_width, buffer_height, chroma_format,
-                                   bufmode)) {
+                                   buffer_width, buffer_height, chroma_format)) {
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
@@ -618,7 +650,6 @@ sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
    return sp_video_create_ex(pipe, profile,
                              chroma_format,
                              width, height,
-                             VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
                              true,
                              PIPE_FORMAT_XYUV);
 }
@@ -627,7 +658,6 @@ struct pipe_video_context *
 sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
                    enum pipe_video_chroma_format chroma_format,
                    unsigned width, unsigned height,
-                   enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
                    bool pot_buffers,
                    enum pipe_format decode_format)
 {
@@ -639,7 +669,6 @@ sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
          return sp_mpeg12_create(pipe, profile,
                                  chroma_format,
                                  width, height,
-                                 bufmode,
                                  pot_buffers,
                                  decode_format);
       default:
diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h
index 9b60bad6b8f..7e04a6d3bc2 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.h
+++ b/src/gallium/drivers/softpipe/sp_video_context.h
@@ -35,14 +35,35 @@
 struct pipe_screen;
 struct pipe_context;
 
+struct sp_mpeg12_buffer
+{
+   struct vl_vertex_buffer vertex_stream;
+
+   union
+   {
+      struct pipe_vertex_buffer all[2];
+      struct {
+         struct pipe_vertex_buffer quad, stream;
+      } individual;
+   } vertex_bufs;
+
+   struct vl_mpeg12_mc_buffer mc;
+};
+
 struct sp_mpeg12_context
 {
    struct pipe_video_context base;
    struct pipe_context *pipe;
    struct pipe_surface *decode_target;
+
+   struct pipe_vertex_buffer quads;
+   unsigned vertex_buffer_size;
+   void *vertex_elems_state;
+
    struct vl_mpeg12_mc_renderer mc_renderer;
+
    struct keymap *buffer_map;
-   struct vl_mpeg12_mc_buffer *mc_buffer;
+   struct sp_mpeg12_buffer *cur_buffer;
    struct vl_compositor compositor;
 
    void *rast;
@@ -63,7 +84,6 @@ struct pipe_video_context *
 sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
                    enum pipe_video_chroma_format chroma_format,
                    unsigned width, unsigned height,
-                   enum VL_MPEG12_MC_RENDERER_BUFFER_MODE bufmode,
                    bool pot_buffers,
                    enum pipe_format decode_format);
 
-- 
cgit v1.2.3


From 0f07da0a1c87e1c7b53700c33d6b1f8f03c1fe11 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 9 Mar 2011 23:40:08 +0100
Subject: [g3dvl] move idct out of mc code

iDCT and MC now look good, but sp_video_context is a total mess
---
 src/gallium/auxiliary/vl/vl_idct.c               |  21 ++-
 src/gallium/auxiliary/vl/vl_idct.h               |   3 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 205 +++--------------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  25 +--
 src/gallium/drivers/softpipe/sp_video_context.c  | 128 ++++++++++++--
 src/gallium/drivers/softpipe/sp_video_context.h  |   3 +
 6 files changed, 161 insertions(+), 224 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index f191a898edd..e0307376a38 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -476,6 +476,15 @@ init_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
       buffer->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, buffer->textures.all[i], &sampler_view);
    }
 
+   template.target = PIPE_TEXTURE_2D;
+   /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
+   template.format = PIPE_FORMAT_R16_SNORM;
+   template.width0 = idct->buffer_width;
+   template.height0 = idct->buffer_height;
+   template.depth0 = 1;
+
+   buffer->destination = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
+
    return true;
 }
 
@@ -577,9 +586,8 @@ vl_idct_cleanup(struct vl_idct *idct)
    pipe_resource_reference(&idct->matrix, NULL);
 }
 
-bool
-vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
-                    struct pipe_resource *dst)
+struct pipe_resource *
+vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
    struct pipe_surface template;
 
@@ -587,14 +595,12 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
 
    assert(buffer);
    assert(idct);
-   assert(dst);
 
    pipe_resource_reference(&buffer->textures.individual.matrix, idct->matrix);
    pipe_resource_reference(&buffer->textures.individual.transpose, idct->matrix);
-   pipe_resource_reference(&buffer->destination, dst);
 
    if (!init_textures(idct, buffer))
-      return false;
+      return NULL;
 
    /* init state */
    buffer->viewport[0].scale[0] = buffer->textures.individual.intermediate->width0;
@@ -640,7 +646,7 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
       buffer->fb_state[i].zsbuf = NULL;
    }
 
-   return true;
+   return buffer->destination;
 }
 
 void
@@ -685,7 +691,6 @@ vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 void
 vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block)
 {
-   //struct vertex2s v;
    unsigned tex_pitch;
    short *texels;
 
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 0ff12cf466d..264ad2bd226 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -97,8 +97,7 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
 
 void vl_idct_cleanup(struct vl_idct *idct);
 
-bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
-                         struct pipe_resource *dst);
+struct pipe_resource *vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
 void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 160388ad049..5b675349481 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -481,68 +481,6 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
    r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
 }
 
-static bool
-init_buffers(struct vl_mpeg12_mc_renderer *r)
-{
-   struct pipe_resource *idct_matrix;
-   unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
-
-   assert(r);
-
-   if (!(idct_matrix = vl_idct_upload_matrix(r->pipe)))
-      return false;
-
-   if (!vl_idct_init(&r->idct_y, r->pipe, r->buffer_width, r->buffer_height,
-                     2, 2, TGSI_SWIZZLE_X, idct_matrix))
-      return false;
-
-   if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      chroma_width = r->buffer_width / 2;
-      chroma_height = r->buffer_height / 2;
-      chroma_blocks_x = 1;
-      chroma_blocks_y = 1;
-   } else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
-      chroma_width = r->buffer_width;
-      chroma_height = r->buffer_height / 2;
-      chroma_blocks_x = 2;
-      chroma_blocks_y = 1;
-   } else {
-      chroma_width = r->buffer_width;
-      chroma_height = r->buffer_height;
-      chroma_blocks_x = 2;
-      chroma_blocks_y = 2;
-   }
-
-   if(!vl_idct_init(&r->idct_cr, r->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
-      return false;
-
-   if(!vl_idct_init(&r->idct_cb, r->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
-      return false;
-
-   r->vs = create_vert_shader(r);
-   r->fs = create_frag_shader(r);
-
-   if (r->vs == NULL || r->fs == NULL)
-      return false;
-
-   return true;
-}
-
-static void
-cleanup_buffers(struct vl_mpeg12_mc_renderer *r)
-{
-   assert(r);
-
-   r->pipe->delete_vs_state(r->pipe, r->vs);
-   r->pipe->delete_fs_state(r->pipe, r->fs);
-
-   vl_idct_cleanup(&r->idct_y);
-   vl_idct_cleanup(&r->idct_cr);
-   vl_idct_cleanup(&r->idct_cb);
-}
-
 static struct pipe_sampler_view
 *find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
 {
@@ -570,41 +508,6 @@ static struct pipe_sampler_view
    return sampler_view;
 }
 
-static void
-grab_blocks(struct vl_mpeg12_mc_renderer *r,
-            struct vl_mpeg12_mc_buffer *buffer,
-            unsigned mbx, unsigned mby,
-            unsigned cbp, short *blocks)
-{
-   unsigned tb = 0;
-   unsigned x, y;
-
-   assert(r);
-   assert(blocks);
-
-   for (y = 0; y < 2; ++y) {
-      for (x = 0; x < 2; ++x, ++tb) {
-         if (cbp & (*r->empty_block_mask)[0][y][x]) {
-            vl_idct_add_block(&buffer->idct_y, mbx * 2 + x, mby * 2 + y, blocks);
-            blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
-         }
-      }
-   }
-
-   /* TODO: Implement 422, 444 */
-   assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-
-   for (tb = 1; tb < 3; ++tb) {
-      if (cbp & (*r->empty_block_mask)[tb][0][0]) {
-         if(tb == 1)
-            vl_idct_add_block(&buffer->idct_cb, mbx, mby, blocks);
-         else
-            vl_idct_add_block(&buffer->idct_cr, mbx, mby, blocks);
-         blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
-      }
-   }
-}
-
 static void
 texview_map_delete(const struct keymap *map,
                    const void *key, void *data,
@@ -649,12 +552,15 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    if (!init_pipe_state(renderer))
       goto error_pipe_state;
 
-   if (!init_buffers(renderer))
-      goto error_buffers;
+   renderer->vs = create_vert_shader(renderer);
+   renderer->fs = create_frag_shader(renderer);
+
+   if (renderer->vs == NULL || renderer->fs == NULL)
+      goto error_shaders;
 
    return true;
 
-error_buffers:
+error_shaders:
    cleanup_pipe_state(renderer);
 
 error_pipe_state:
@@ -669,13 +575,15 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 
    util_delete_keymap(renderer->texview_map, renderer->pipe);
    cleanup_pipe_state(renderer);
-   cleanup_buffers(renderer);
+
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs);
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs);
 }
 
 bool
-vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
+vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
+                         struct pipe_resource *y, struct pipe_resource *cr, struct pipe_resource *cb)
 {
-   struct pipe_resource template;
    struct pipe_sampler_view sampler_view;
 
    unsigned i;
@@ -686,41 +594,9 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    buffer->past = NULL;
    buffer->future = NULL;
 
-   memset(&template, 0, sizeof(struct pipe_resource));
-   template.target = PIPE_TEXTURE_2D;
-   /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
-   template.format = PIPE_FORMAT_R16_SNORM;
-   template.last_level = 0;
-   template.width0 = renderer->buffer_width;
-   template.height0 = renderer->buffer_height;
-   template.depth0 = 1;
-   template.array_size = 1;
-   template.usage = PIPE_USAGE_STATIC;
-   template.bind = PIPE_BIND_SAMPLER_VIEW;
-   template.flags = 0;
-
-   buffer->textures.individual.y = renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
-
-   if (!vl_idct_init_buffer(&renderer->idct_y, &buffer->idct_y, buffer->textures.individual.y))
-      return false;
-
-   if (renderer->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      template.width0 = renderer->buffer_width / 2;
-      template.height0 = renderer->buffer_height / 2;
-   }
-   else if (renderer->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
-      template.height0 = renderer->buffer_height / 2;
-
-   buffer->textures.individual.cb =
-      renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
-   buffer->textures.individual.cr =
-      renderer->pipe->screen->resource_create(renderer->pipe->screen, &template);
-
-   if (!vl_idct_init_buffer(&renderer->idct_cb, &buffer->idct_cb, buffer->textures.individual.cb))
-      return false;
-
-   if (!vl_idct_init_buffer(&renderer->idct_cr, &buffer->idct_cr, buffer->textures.individual.cr))
-      return false;
+   pipe_resource_reference(&buffer->textures.individual.y, y);
+   pipe_resource_reference(&buffer->textures.individual.cr, cr);
+   pipe_resource_reference(&buffer->textures.individual.cb, cb);
 
    for (i = 0; i < 3; ++i) {
       u_sampler_view_default_template(&sampler_view,
@@ -749,41 +625,21 @@ vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
       pipe_resource_reference(&buffer->textures.all[i], NULL);
    }
 
-   vl_idct_cleanup_buffer(&renderer->idct_y, &buffer->idct_y);
-   vl_idct_cleanup_buffer(&renderer->idct_cb, &buffer->idct_cb);
-   vl_idct_cleanup_buffer(&renderer->idct_cr, &buffer->idct_cr);
-
    pipe_surface_reference(&buffer->surface, NULL);
    pipe_surface_reference(&buffer->past, NULL);
    pipe_surface_reference(&buffer->future, NULL);
 }
 
 void
-vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
+vl_mpeg12_mc_set_surfaces(struct vl_mpeg12_mc_renderer *renderer,
+                          struct vl_mpeg12_mc_buffer *buffer,
+                          struct pipe_surface *surface,
+                          struct pipe_surface *past,
+                          struct pipe_surface *future,
+                          struct pipe_fence_handle **fence)
 {
-   assert(renderer && buffer);
-
-   vl_idct_map_buffers(&renderer->idct_y, &buffer->idct_y);
-   vl_idct_map_buffers(&renderer->idct_cr, &buffer->idct_cr);
-   vl_idct_map_buffers(&renderer->idct_cb, &buffer->idct_cb);
-}
-
-void
-vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
-                                         struct vl_mpeg12_mc_buffer *buffer,
-                                         struct pipe_surface *surface,
-                                         struct pipe_surface *past,
-                                         struct pipe_surface *future,
-                                         unsigned num_macroblocks,
-                                         struct pipe_mpeg12_macroblock *mpeg12_macroblocks,
-                                         struct pipe_fence_handle **fence)
-{
-   unsigned i;
-
    assert(renderer && buffer);
    assert(surface);
-   assert(num_macroblocks);
-   assert(mpeg12_macroblocks);
 
    if (surface != buffer->surface) {
       pipe_surface_reference(&buffer->surface, surface);
@@ -795,23 +651,6 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
       assert(buffer->past == past);
       assert(buffer->future == future);
    }
-
-   for (i = 0; i < num_macroblocks; ++i) {
-      struct pipe_mpeg12_macroblock *mb = &mpeg12_macroblocks[i];
-
-      assert(mb->base.codec == PIPE_VIDEO_CODEC_MPEG12);
-      grab_blocks(renderer, buffer, mb->mbx, mb->mby, mb->cbp, mb->blocks);
-   }
-}
-
-void
-vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
-{
-   assert(renderer && buffer);
-
-   vl_idct_unmap_buffers(&renderer->idct_y, &buffer->idct_y);
-   vl_idct_unmap_buffers(&renderer->idct_cr, &buffer->idct_cr);
-   vl_idct_unmap_buffers(&renderer->idct_cb, &buffer->idct_cb);
 }
 
 void
@@ -824,10 +663,6 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    if (not_empty_num_instances == 0 && empty_num_instances == 0)
       return;
 
-   vl_idct_flush(&renderer->idct_y, &buffer->idct_y, not_empty_num_instances);
-   vl_idct_flush(&renderer->idct_cr, &buffer->idct_cr, not_empty_num_instances);
-   vl_idct_flush(&renderer->idct_cb, &buffer->idct_cb, not_empty_num_instances);
-
    renderer->fb_state.cbufs[0] = buffer->surface;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
    renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 86a6518e3c2..db8f2fff749 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -50,8 +50,6 @@ struct vl_mpeg12_mc_renderer
    struct pipe_viewport_state viewport;
    struct pipe_framebuffer_state fb_state;
 
-   struct vl_idct idct_y, idct_cr, idct_cb;
-
    void *rs_state;
 
    void *vs, *fs;
@@ -67,8 +65,6 @@ struct vl_mpeg12_mc_renderer
 
 struct vl_mpeg12_mc_buffer
 {
-   struct vl_idct_buffer idct_y, idct_cb, idct_cr;
-
    union
    {
       struct pipe_sampler_view *all[5];
@@ -93,22 +89,17 @@ bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
 
 void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 
-bool vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer);
+bool vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
+                              struct pipe_resource *y, struct pipe_resource *cr, struct pipe_resource *cb);
 
 void vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer);
 
-void vl_mpeg12_mc_map_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer);
-
-void vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer *renderer,
-                                              struct vl_mpeg12_mc_buffer *buffer,
-                                              struct pipe_surface *surface,
-                                              struct pipe_surface *past,
-                                              struct pipe_surface *future,
-                                              unsigned num_macroblocks,
-                                              struct pipe_mpeg12_macroblock *mpeg12_macroblocks,
-                                              struct pipe_fence_handle **fence);
-
-void vl_mpeg12_mc_unmap_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer);
+void vl_mpeg12_mc_set_surfaces(struct vl_mpeg12_mc_renderer *renderer,
+                               struct vl_mpeg12_mc_buffer *buffer,
+                               struct pipe_surface *surface,
+                               struct pipe_surface *past,
+                               struct pipe_surface *future,
+                               struct pipe_fence_handle **fence);
 
 void vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
                                  unsigned not_empty_start_instance, unsigned not_empty_num_instances,
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index e733399b59a..32398e4ff08 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -29,6 +29,7 @@
 #include "util/u_memory.h"
 
 #include "sp_video_context.h"
+#include <pipe/p_shader_tokens.h>
 #include <util/u_inlines.h>
 #include <util/u_memory.h>
 #include <util/u_keymap.h>
@@ -40,6 +41,8 @@
 
 #define MACROBLOCK_WIDTH 16
 #define MACROBLOCK_HEIGHT 16
+#define BLOCK_WIDTH 8
+#define BLOCK_HEIGHT 8
 
 #define NUM_BUFFERS 2
 
@@ -52,12 +55,19 @@ flush_buffer(struct sp_mpeg12_context *ctx)
    if(ctx->cur_buffer != NULL) {
 
       vl_vb_unmap(&ctx->cur_buffer->vertex_stream, ctx->pipe);
-      vl_mpeg12_mc_unmap_buffer(&ctx->mc_renderer, &ctx->cur_buffer->mc);
-      vl_vb_restart(&ctx->cur_buffer->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
+      vl_idct_unmap_buffers(&ctx->idct_y, &ctx->cur_buffer->idct_y);
+      vl_idct_unmap_buffers(&ctx->idct_cr, &ctx->cur_buffer->idct_cr);
+      vl_idct_unmap_buffers(&ctx->idct_cb, &ctx->cur_buffer->idct_cb);
+      vl_vb_restart(&ctx->cur_buffer->vertex_stream,
+		    &ne_start, &ne_num, &e_start, &e_num);
 
       ctx->pipe->set_vertex_buffers(ctx->pipe, 2, ctx->cur_buffer->vertex_bufs.all);
       ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
-      vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &ctx->cur_buffer->mc, ne_start, ne_num, e_start, e_num);
+      vl_idct_flush(&ctx->idct_y, &ctx->cur_buffer->idct_y, ne_num);
+      vl_idct_flush(&ctx->idct_cr, &ctx->cur_buffer->idct_cr, ne_num);
+      vl_idct_flush(&ctx->idct_cb, &ctx->cur_buffer->idct_cb, ne_num);
+      vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &ctx->cur_buffer->mc,
+                                  ne_start, ne_num, e_start, e_num);
 
       ctx->cur_buffer = NULL;
    }
@@ -66,6 +76,7 @@ flush_buffer(struct sp_mpeg12_context *ctx)
 static void
 rotate_buffer(struct sp_mpeg12_context *ctx)
 {
+   struct pipe_resource *y, *cr, *cb;
    static unsigned key = 0;
    struct sp_mpeg12_buffer *buffer;
 
@@ -87,8 +98,22 @@ rotate_buffer(struct sp_mpeg12_context *ctx)
 
       buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, ctx->pipe,
                                                          ctx->vertex_buffer_size);
+      if (!(y = vl_idct_init_buffer(&ctx->idct_y, &buffer->idct_y))) {
+         FREE(buffer);
+         return;
+      }
+
+      if (!(cr = vl_idct_init_buffer(&ctx->idct_cr, &buffer->idct_cr))) {
+         FREE(buffer);
+         return;
+      }
 
-      if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &buffer->mc)) {
+      if (!(cb = vl_idct_init_buffer(&ctx->idct_cb, &buffer->idct_cb))) {
+         FREE(buffer);
+         return;
+      }
+
+      if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &buffer->mc, y, cr, cb)) {
          FREE(buffer);
          return;
       }
@@ -101,7 +126,9 @@ rotate_buffer(struct sp_mpeg12_context *ctx)
    ctx->cur_buffer = buffer;
 
    vl_vb_map(&ctx->cur_buffer->vertex_stream, ctx->pipe);
-   vl_mpeg12_mc_map_buffer(&ctx->mc_renderer, &ctx->cur_buffer->mc);
+   vl_idct_map_buffers(&ctx->idct_y, &ctx->cur_buffer->idct_y);
+   vl_idct_map_buffers(&ctx->idct_cr, &ctx->cur_buffer->idct_cr);
+   vl_idct_map_buffers(&ctx->idct_cb, &ctx->cur_buffer->idct_cb);
 }
 
 static void
@@ -118,9 +145,49 @@ delete_buffer(const struct keymap *map,
    assert(user);
 
    vl_vb_cleanup(&buf->vertex_stream);
+   vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct_y);
+   vl_idct_cleanup_buffer(&ctx->idct_cb, &buf->idct_cb);
+   vl_idct_cleanup_buffer(&ctx->idct_cr, &buf->idct_cr);
    vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &buf->mc);
 }
 
+static void
+upload_buffer(struct sp_mpeg12_context *ctx,
+              struct sp_mpeg12_buffer *buffer,
+              struct pipe_mpeg12_macroblock *mb)
+{
+   short *blocks;
+   unsigned tb, x, y;
+
+   assert(ctx);
+   assert(buffer);
+   assert(mb);
+
+   blocks = mb->blocks;
+
+   for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x, ++tb) {
+         if (mb->cbp & (*ctx->mc_renderer.empty_block_mask)[0][y][x]) {
+            vl_idct_add_block(&buffer->idct_y, mb->mbx * 2 + x, mb->mby * 2 + y, blocks);
+            blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
+         }
+      }
+   }
+
+   /* TODO: Implement 422, 444 */
+   assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+
+   for (tb = 1; tb < 3; ++tb) {
+      if (mb->cbp & (*ctx->mc_renderer.empty_block_mask)[tb][0][0]) {
+         if(tb == 1)
+            vl_idct_add_block(&buffer->idct_cb, mb->mbx, mb->mby, blocks);
+         else
+            vl_idct_add_block(&buffer->idct_cr, mb->mbx, mb->mby, blocks);
+         blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
+      }
+   }
+}
+
 static void
 sp_mpeg12_destroy(struct pipe_video_context *vpipe)
 {
@@ -142,6 +209,9 @@ sp_mpeg12_destroy(struct pipe_video_context *vpipe)
    vl_compositor_cleanup(&ctx->compositor);
    util_delete_keymap(ctx->buffer_map, ctx);
    vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+   vl_idct_cleanup(&ctx->idct_y);
+   vl_idct_cleanup(&ctx->idct_cr);
+   vl_idct_cleanup(&ctx->idct_cb);
    ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
    pipe_resource_reference(&ctx->quads.buffer, NULL);
    ctx->pipe->destroy(ctx->pipe);
@@ -223,14 +293,14 @@ sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
    assert(ctx->decode_target);
    assert(ctx->cur_buffer);
 
-   for ( i = 0; i < num_macroblocks; ++i )
-      vl_vb_add_block(&ctx->cur_buffer->vertex_stream, &mpeg12_macroblocks[i], ctx->mc_renderer.empty_block_mask);
+   for ( i = 0; i < num_macroblocks; ++i ) {
+      vl_vb_add_block(&ctx->cur_buffer->vertex_stream, &mpeg12_macroblocks[i],
+                      ctx->mc_renderer.empty_block_mask);
+      upload_buffer(ctx, ctx->cur_buffer, &mpeg12_macroblocks[i]);
+   }
 
-   vl_mpeg12_mc_renderer_render_macroblocks(&ctx->mc_renderer,
-                                            &ctx->cur_buffer->mc,
-                                            ctx->decode_target,
-                                            past, future, num_macroblocks,
-                                            mpeg12_macroblocks, fence);
+   vl_mpeg12_mc_set_surfaces(&ctx->mc_renderer, &ctx->cur_buffer->mc,
+                             ctx->decode_target, past, future, fence);
 }
 
 static void
@@ -541,7 +611,9 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
                  bool pot_buffers,
                  enum pipe_format decode_format)
 {
+   struct pipe_resource *idct_matrix;
    unsigned buffer_width, buffer_height;
+   unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
    struct sp_mpeg12_context *ctx;
 
    assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
@@ -597,6 +669,38 @@ sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
       return NULL;
    }
 
+   if (!(idct_matrix = vl_idct_upload_matrix(ctx->pipe)))
+      return false;
+
+   if (!vl_idct_init(&ctx->idct_y, ctx->pipe, buffer_width, buffer_height,
+                     2, 2, TGSI_SWIZZLE_X, idct_matrix))
+      return false;
+
+   if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+      chroma_width = buffer_width / 2;
+      chroma_height = buffer_height / 2;
+      chroma_blocks_x = 1;
+      chroma_blocks_y = 1;
+   } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
+      chroma_width = buffer_width;
+      chroma_height = buffer_height / 2;
+      chroma_blocks_x = 2;
+      chroma_blocks_y = 1;
+   } else {
+      chroma_width = buffer_width;
+      chroma_height = buffer_height;
+      chroma_blocks_x = 2;
+      chroma_blocks_y = 2;
+   }
+
+   if(!vl_idct_init(&ctx->idct_cr, ctx->pipe, chroma_width, chroma_height,
+                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
+      return false;
+
+   if(!vl_idct_init(&ctx->idct_cb, ctx->pipe, chroma_width, chroma_height,
+                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
+      return false;
+
    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
                                    buffer_width, buffer_height, chroma_format)) {
       ctx->pipe->destroy(ctx->pipe);
diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h
index 7e04a6d3bc2..2e3e4ec729f 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.h
+++ b/src/gallium/drivers/softpipe/sp_video_context.h
@@ -47,6 +47,8 @@ struct sp_mpeg12_buffer
       } individual;
    } vertex_bufs;
 
+   struct vl_idct_buffer idct_y, idct_cb, idct_cr;
+
    struct vl_mpeg12_mc_buffer mc;
 };
 
@@ -60,6 +62,7 @@ struct sp_mpeg12_context
    unsigned vertex_buffer_size;
    void *vertex_elems_state;
 
+   struct vl_idct idct_y, idct_cr, idct_cb;
    struct vl_mpeg12_mc_renderer mc_renderer;
 
    struct keymap *buffer_map;
-- 
cgit v1.2.3


From e87bd8c9578dee384ff03039aa792e1a8dae7f36 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 16 Mar 2011 23:09:52 +0100
Subject: [g3dvl] cleanup and documentation

---
 src/gallium/auxiliary/vl/vl_defines.h            | 38 ++++++++++++++++++++++++
 src/gallium/auxiliary/vl/vl_idct.c               |  8 ++---
 src/gallium/auxiliary/vl/vl_idct.h               | 13 ++++++++
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  7 ++---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  2 --
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     |  4 ---
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     |  7 ++++-
 src/gallium/drivers/softpipe/sp_video_context.c  |  8 +----
 src/gallium/drivers/softpipe/sp_video_context.h  | 35 +++++++++++-----------
 9 files changed, 82 insertions(+), 40 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_defines.h

diff --git a/src/gallium/auxiliary/vl/vl_defines.h b/src/gallium/auxiliary/vl/vl_defines.h
new file mode 100644
index 00000000000..668991f904f
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_defines.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_defines_h
+#define vl_defines_h
+
+/* constants usually used with all known codecs */
+#define MACROBLOCK_WIDTH 16
+#define MACROBLOCK_HEIGHT 16
+
+#define BLOCK_WIDTH 8
+#define BLOCK_HEIGHT 8
+
+#endif
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index e0307376a38..6b0010a04bb 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -27,6 +27,7 @@
 
 #include "vl_idct.h"
 #include "vl_vertex_buffers.h"
+#include "vl_defines.h"
 #include "util/u_draw.h"
 #include <assert.h>
 #include <pipe/p_context.h>
@@ -37,9 +38,6 @@
 #include <tgsi/tgsi_ureg.h>
 #include "vl_types.h"
 
-#define BLOCK_WIDTH 8
-#define BLOCK_HEIGHT 8
-
 #define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
 
 #define NR_RENDER_TARGETS 4
@@ -504,6 +502,8 @@ cleanup_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 struct pipe_resource *
 vl_idct_upload_matrix(struct pipe_context *pipe)
 {
+   const float scale = sqrtf(SCALE_FACTOR_16_TO_9);
+
    struct pipe_resource template, *matrix;
    struct pipe_transfer *buf_transfer;
    unsigned i, j, pitch;
@@ -544,7 +544,7 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    for(i = 0; i < BLOCK_HEIGHT; ++i)
       for(j = 0; j < BLOCK_WIDTH; ++j)
          // transpose and scale
-         f[i * pitch + j] = const_matrix[j][i] * sqrtf(SCALE_FACTOR_16_TO_9);
+         f[i * pitch + j] = const_matrix[j][i] * scale;
 
    pipe->transfer_unmap(pipe, buf_transfer);
    pipe->transfer_destroy(pipe, buf_transfer);
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 264ad2bd226..913034e7ab4 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -31,6 +31,9 @@
 #include <pipe/p_state.h>
 #include "vl_vertex_buffers.h"
 
+/* shader based inverse distinct cosinus transformation
+ * expect usage of vl_vertex_buffers as a todo list
+ */
 struct vl_idct
 {
    struct pipe_context *pipe;
@@ -57,6 +60,7 @@ struct vl_idct
    struct pipe_resource *matrix;
 };
 
+/* a set of buffers to work with */
 struct vl_idct_buffer
 {
    struct pipe_viewport_state viewport[2];
@@ -88,25 +92,34 @@ struct vl_idct_buffer
    short *texels;
 };
 
+/* upload the idct matrix, which can be shared by all idct instances of a pipe */
 struct pipe_resource *vl_idct_upload_matrix(struct pipe_context *pipe);
 
+/* init an idct instance */
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
                   unsigned blocks_x, unsigned blocks_y,
                   int color_swizzle, struct pipe_resource *matrix);
 
+/* destroy an idct instance */
 void vl_idct_cleanup(struct vl_idct *idct);
 
+/* init a buffer assosiated with agiven idct instance */
 struct pipe_resource *vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
+/* cleanup a buffer of an idct instance */
 void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
+/* map a buffer for use with vl_idct_add_block */
 void vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
+/* add an block of to be tranformed data a the given x and y coordinate */
 void vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block);
 
+/* unmaps the buffers before flushing */
 void vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
+/* flush the buffer and start rendering, vertex buffers needs to be setup before calling this */
 void vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_verts);
 
 #endif
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 5b675349481..df3373ebd88 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -26,6 +26,8 @@
  **************************************************************************/
 
 #include "vl_mpeg12_mc_renderer.h"
+#include "vl_vertex_buffers.h"
+#include "vl_defines.h"
 #include "util/u_draw.h"
 #include <assert.h>
 #include <pipe/p_context.h>
@@ -38,11 +40,6 @@
 #include <util/u_draw.h>
 #include <tgsi/tgsi_ureg.h>
 
-#define MACROBLOCK_WIDTH 16
-#define MACROBLOCK_HEIGHT 16
-#define BLOCK_WIDTH 8
-#define BLOCK_HEIGHT 8
-
 enum VS_OUTPUT
 {
    VS_O_VPOS,
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index db8f2fff749..805fec530b0 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -32,8 +32,6 @@
 #include <pipe/p_state.h>
 #include <pipe/p_video_state.h>
 #include "vl_types.h"
-#include "vl_idct.h"
-#include "vl_vertex_buffers.h"
 
 struct pipe_context;
 struct pipe_macroblock;
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 41e9809f744..c5366e8e4fc 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -26,10 +26,6 @@
  **************************************************************************/
 
 #include <assert.h>
-#include <pipe/p_context.h>
-#include <pipe/p_screen.h>
-#include <util/u_memory.h>
-#include <util/u_inlines.h>
 #include <util/u_format.h>
 #include "vl_vertex_buffers.h"
 #include "vl_types.h"
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 0f7f47f2703..88e0270c170 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -27,11 +27,16 @@
 #ifndef vl_vertex_buffers_h
 #define vl_vertex_buffers_h
 
-#include <assert.h>
 #include <pipe/p_state.h>
 #include <pipe/p_video_state.h>
 #include "vl_types.h"
 
+/* vertex buffers act as a todo list
+ * uploading all the usefull informations to video ram
+ * so a vertex shader can work with them
+ */
+
+/* inputs to the vertex shaders */
 enum VS_INPUT
 {
    VS_I_RECT,
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
index 32398e4ff08..7e3519e694f 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ b/src/gallium/drivers/softpipe/sp_video_context.c
@@ -36,13 +36,7 @@
 #include <util/u_rect.h>
 #include <util/u_video.h>
 #include <util/u_surface.h>
-#include "sp_public.h"
-#include "sp_texture.h"
-
-#define MACROBLOCK_WIDTH 16
-#define MACROBLOCK_HEIGHT 16
-#define BLOCK_WIDTH 8
-#define BLOCK_HEIGHT 8
+#include <vl/vl_defines.h>
 
 #define NUM_BUFFERS 2
 
diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h
index 2e3e4ec729f..04368a64a53 100644
--- a/src/gallium/drivers/softpipe/sp_video_context.h
+++ b/src/gallium/drivers/softpipe/sp_video_context.h
@@ -29,29 +29,13 @@
 #define SP_VIDEO_CONTEXT_H
 
 #include <pipe/p_video_context.h>
+#include <vl/vl_idct.h>
 #include <vl/vl_mpeg12_mc_renderer.h>
 #include <vl/vl_compositor.h>
 
 struct pipe_screen;
 struct pipe_context;
 
-struct sp_mpeg12_buffer
-{
-   struct vl_vertex_buffer vertex_stream;
-
-   union
-   {
-      struct pipe_vertex_buffer all[2];
-      struct {
-         struct pipe_vertex_buffer quad, stream;
-      } individual;
-   } vertex_bufs;
-
-   struct vl_idct_buffer idct_y, idct_cb, idct_cr;
-
-   struct vl_mpeg12_mc_buffer mc;
-};
-
 struct sp_mpeg12_context
 {
    struct pipe_video_context base;
@@ -76,6 +60,23 @@ struct sp_mpeg12_context
    enum pipe_format decode_format;
 };
 
+struct sp_mpeg12_buffer
+{
+   struct vl_vertex_buffer vertex_stream;
+
+   union
+   {
+      struct pipe_vertex_buffer all[2];
+      struct {
+         struct pipe_vertex_buffer quad, stream;
+      } individual;
+   } vertex_bufs;
+
+   struct vl_idct_buffer idct_y, idct_cb, idct_cr;
+
+   struct vl_mpeg12_mc_buffer mc;
+};
+
 struct pipe_video_context *
 sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
                 enum pipe_video_chroma_format chroma_format,
-- 
cgit v1.2.3


From a1fecd09c2c5fdba3da5c38fce567b5bd2e6053d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 17 Mar 2011 00:08:20 +0100
Subject: [g3dvl] move mpeg12 context out of softpipe

---
 src/gallium/auxiliary/Makefile                  |   1 +
 src/gallium/auxiliary/vl/vl_mpeg12_context.c    | 732 ++++++++++++++++++++++
 src/gallium/auxiliary/vl/vl_mpeg12_context.h    |  90 +++
 src/gallium/drivers/nv40/nv40_video_context.c   |  16 +-
 src/gallium/drivers/nvfx/nvfx_video_context.c   |  17 +-
 src/gallium/drivers/r600/r600_video_context.c   |  43 +-
 src/gallium/drivers/softpipe/Makefile           |   3 +-
 src/gallium/drivers/softpipe/sp_screen.c        |  40 +-
 src/gallium/drivers/softpipe/sp_video_context.c | 775 ------------------------
 src/gallium/drivers/softpipe/sp_video_context.h |  94 ---
 src/gallium/targets/xvmc-r600/Makefile          |   1 -
 11 files changed, 921 insertions(+), 891 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_context.c
 create mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_context.h
 delete mode 100644 src/gallium/drivers/softpipe/sp_video_context.c
 delete mode 100644 src/gallium/drivers/softpipe/sp_video_context.h

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index cc16bcfb539..f5fea1f71b8 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -147,6 +147,7 @@ C_SOURCES = \
 	util/u_vbuf_mgr.c \
 	vl/vl_bitstream_parser.c \
 	vl/vl_mpeg12_mc_renderer.c \
+	vl/vl_mpeg12_context.c \
 	vl/vl_compositor.c \
 	vl/vl_csc.c \
         vl/vl_idct.c \
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
new file mode 100644
index 00000000000..947db1c22be
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -0,0 +1,732 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+#include "vl_mpeg12_context.h"
+#include "vl_defines.h"
+#include <pipe/p_shader_tokens.h>
+#include <util/u_inlines.h>
+#include <util/u_memory.h>
+#include <util/u_keymap.h>
+#include <util/u_rect.h>
+#include <util/u_video.h>
+#include <util/u_surface.h>
+
+#define NUM_BUFFERS 2
+
+static void
+flush_buffer(struct vl_mpeg12_context *ctx)
+{
+   unsigned ne_start, ne_num, e_start, e_num;
+   assert(ctx);
+
+   if(ctx->cur_buffer != NULL) {
+
+      vl_vb_unmap(&ctx->cur_buffer->vertex_stream, ctx->pipe);
+      vl_idct_unmap_buffers(&ctx->idct_y, &ctx->cur_buffer->idct_y);
+      vl_idct_unmap_buffers(&ctx->idct_cr, &ctx->cur_buffer->idct_cr);
+      vl_idct_unmap_buffers(&ctx->idct_cb, &ctx->cur_buffer->idct_cb);
+      vl_vb_restart(&ctx->cur_buffer->vertex_stream,
+		    &ne_start, &ne_num, &e_start, &e_num);
+
+      ctx->pipe->set_vertex_buffers(ctx->pipe, 2, ctx->cur_buffer->vertex_bufs.all);
+      ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
+      vl_idct_flush(&ctx->idct_y, &ctx->cur_buffer->idct_y, ne_num);
+      vl_idct_flush(&ctx->idct_cr, &ctx->cur_buffer->idct_cr, ne_num);
+      vl_idct_flush(&ctx->idct_cb, &ctx->cur_buffer->idct_cb, ne_num);
+      vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &ctx->cur_buffer->mc,
+                                  ne_start, ne_num, e_start, e_num);
+
+      ctx->cur_buffer = NULL;
+   }
+}
+
+static void
+rotate_buffer(struct vl_mpeg12_context *ctx)
+{
+   struct pipe_resource *y, *cr, *cb;
+   static unsigned key = 0;
+   struct vl_mpeg12_buffer *buffer;
+
+   assert(ctx);
+
+   flush_buffer(ctx);
+
+   buffer = (struct vl_mpeg12_buffer*)util_keymap_lookup(ctx->buffer_map, &key);
+   if (!buffer) {
+      boolean added_to_map;
+
+      buffer = CALLOC_STRUCT(vl_mpeg12_buffer);
+      if (buffer == NULL)
+         return;
+
+      buffer->vertex_bufs.individual.quad.stride = ctx->quads.stride;
+      buffer->vertex_bufs.individual.quad.buffer_offset = ctx->quads.buffer_offset;
+      pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, ctx->quads.buffer);
+
+      buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, ctx->pipe,
+                                                         ctx->vertex_buffer_size);
+      if (!(y = vl_idct_init_buffer(&ctx->idct_y, &buffer->idct_y))) {
+         FREE(buffer);
+         return;
+      }
+
+      if (!(cr = vl_idct_init_buffer(&ctx->idct_cr, &buffer->idct_cr))) {
+         FREE(buffer);
+         return;
+      }
+
+      if (!(cb = vl_idct_init_buffer(&ctx->idct_cb, &buffer->idct_cb))) {
+         FREE(buffer);
+         return;
+      }
+
+      if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &buffer->mc, y, cr, cb)) {
+         FREE(buffer);
+         return;
+      }
+
+      added_to_map = util_keymap_insert(ctx->buffer_map, &key, buffer, ctx);
+      assert(added_to_map);
+   }
+   ++key;
+   key %= NUM_BUFFERS;
+   ctx->cur_buffer = buffer;
+
+   vl_vb_map(&ctx->cur_buffer->vertex_stream, ctx->pipe);
+   vl_idct_map_buffers(&ctx->idct_y, &ctx->cur_buffer->idct_y);
+   vl_idct_map_buffers(&ctx->idct_cr, &ctx->cur_buffer->idct_cr);
+   vl_idct_map_buffers(&ctx->idct_cb, &ctx->cur_buffer->idct_cb);
+}
+
+static void
+delete_buffer(const struct keymap *map,
+              const void *key, void *data,
+              void *user)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)user;
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)data;
+
+   assert(map);
+   assert(key);
+   assert(data);
+   assert(user);
+
+   vl_vb_cleanup(&buf->vertex_stream);
+   vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct_y);
+   vl_idct_cleanup_buffer(&ctx->idct_cb, &buf->idct_cb);
+   vl_idct_cleanup_buffer(&ctx->idct_cr, &buf->idct_cr);
+   vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &buf->mc);
+}
+
+static void
+upload_buffer(struct vl_mpeg12_context *ctx,
+              struct vl_mpeg12_buffer *buffer,
+              struct pipe_mpeg12_macroblock *mb)
+{
+   short *blocks;
+   unsigned tb, x, y;
+
+   assert(ctx);
+   assert(buffer);
+   assert(mb);
+
+   blocks = mb->blocks;
+
+   for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x, ++tb) {
+         if (mb->cbp & (*ctx->mc_renderer.empty_block_mask)[0][y][x]) {
+            vl_idct_add_block(&buffer->idct_y, mb->mbx * 2 + x, mb->mby * 2 + y, blocks);
+            blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
+         }
+      }
+   }
+
+   /* TODO: Implement 422, 444 */
+   assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+
+   for (tb = 1; tb < 3; ++tb) {
+      if (mb->cbp & (*ctx->mc_renderer.empty_block_mask)[tb][0][0]) {
+         if(tb == 1)
+            vl_idct_add_block(&buffer->idct_cb, mb->mbx, mb->mby, blocks);
+         else
+            vl_idct_add_block(&buffer->idct_cr, mb->mbx, mb->mby, blocks);
+         blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
+      }
+   }
+}
+
+static void
+vl_mpeg12_destroy(struct pipe_video_context *vpipe)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+
+   flush_buffer(ctx);
+
+   /* Asserted in softpipe_delete_fs_state() for some reason */
+   ctx->pipe->bind_vs_state(ctx->pipe, NULL);
+   ctx->pipe->bind_fs_state(ctx->pipe, NULL);
+
+   ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend);
+   ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast);
+   ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
+
+   pipe_surface_reference(&ctx->decode_target, NULL);
+   vl_compositor_cleanup(&ctx->compositor);
+   util_delete_keymap(ctx->buffer_map, ctx);
+   vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+   vl_idct_cleanup(&ctx->idct_y);
+   vl_idct_cleanup(&ctx->idct_cr);
+   vl_idct_cleanup(&ctx->idct_cb);
+   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
+   pipe_resource_reference(&ctx->quads.buffer, NULL);
+   ctx->pipe->destroy(ctx->pipe);
+
+   FREE(ctx);
+}
+
+static int
+vl_mpeg12_get_param(struct pipe_video_context *vpipe, int param)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+
+   switch (param) {
+      case PIPE_CAP_NPOT_TEXTURES:
+         /* XXX: Temporary; not all paths are NPOT-tested */
+#if 0
+         return ctx->pipe->screen->get_param(ctx->pipe->screen, param);
+#endif
+         return FALSE;
+      case PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT:
+         return ctx->decode_format;
+      default:
+      {
+         debug_printf("vl_mpeg12_context: Unknown PIPE_CAP %d\n", param);
+         return 0;
+      }
+   }
+}
+
+static struct pipe_surface *
+vl_mpeg12_create_surface(struct pipe_video_context *vpipe,
+                         struct pipe_resource *resource,
+                         const struct pipe_surface *templat)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+
+   return ctx->pipe->create_surface(ctx->pipe, resource, templat);
+}
+
+static boolean
+vl_mpeg12_is_format_supported(struct pipe_video_context *vpipe,
+                              enum pipe_format format,
+                              unsigned usage,
+                              unsigned geom)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+
+   /* XXX: Temporary; not all paths are NPOT-tested */
+   if (geom & PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO)
+      return FALSE;
+
+
+   return ctx->pipe->screen->is_format_supported(ctx->pipe->screen, format, PIPE_TEXTURE_2D,
+                                                 0, usage, geom);
+}
+
+static void
+vl_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
+                             struct pipe_surface *past,
+                             struct pipe_surface *future,
+                             unsigned num_macroblocks,
+                             struct pipe_macroblock *macroblocks,
+                             struct pipe_fence_handle **fence)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+   struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks;
+   unsigned i;
+
+   assert(vpipe);
+   assert(num_macroblocks);
+   assert(macroblocks);
+   assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
+   assert(ctx->decode_target);
+   assert(ctx->cur_buffer);
+
+   for ( i = 0; i < num_macroblocks; ++i ) {
+      vl_vb_add_block(&ctx->cur_buffer->vertex_stream, &mpeg12_macroblocks[i],
+                      ctx->mc_renderer.empty_block_mask);
+      upload_buffer(ctx, ctx->cur_buffer, &mpeg12_macroblocks[i]);
+   }
+
+   vl_mpeg12_mc_set_surfaces(&ctx->mc_renderer, &ctx->cur_buffer->mc,
+                             ctx->decode_target, past, future, fence);
+}
+
+static void
+vl_mpeg12_clear_render_target(struct pipe_video_context *vpipe,
+                       struct pipe_surface *dst,
+                       unsigned dstx, unsigned dsty,
+                       const float *rgba,
+                       unsigned width, unsigned height)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(dst);
+
+   if (ctx->pipe->clear_render_target)
+      ctx->pipe->clear_render_target(ctx->pipe, dst, rgba, dstx, dsty, width, height);
+   else
+      util_clear_render_target(ctx->pipe, dst, rgba, dstx, dsty, width, height);
+}
+
+static void
+vl_mpeg12_resource_copy_region(struct pipe_video_context *vpipe,
+                               struct pipe_resource *dst,
+                               unsigned dstx, unsigned dsty, unsigned dstz,
+                               struct pipe_resource *src,
+                               unsigned srcx, unsigned srcy, unsigned srcz,
+                               unsigned width, unsigned height)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(dst);
+
+   struct pipe_box box;
+   box.x = srcx;
+   box.y = srcy;
+   box.z = srcz;
+   box.width = width;
+   box.height = height;
+
+   if (ctx->pipe->resource_copy_region)
+      ctx->pipe->resource_copy_region(ctx->pipe, dst, 0,
+                                      dstx, dsty, dstz,
+                                      src, 0, &box);
+   else
+      util_resource_copy_region(ctx->pipe, dst, 0,
+                                dstx, dsty, dstz,
+                                src, 0, &box);
+}
+
+static struct pipe_transfer*
+vl_mpeg12_get_transfer(struct pipe_video_context *vpipe,
+                       struct pipe_resource *resource,
+                       unsigned level,
+                       unsigned usage,  /* a combination of PIPE_TRANSFER_x */
+                       const struct pipe_box *box)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(resource);
+   assert(box);
+
+   return ctx->pipe->get_transfer(ctx->pipe, resource, level, usage, box);
+}
+
+static void
+vl_mpeg12_transfer_destroy(struct pipe_video_context *vpipe,
+                           struct pipe_transfer *transfer)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(transfer);
+
+   ctx->pipe->transfer_destroy(ctx->pipe, transfer);
+}
+
+static void*
+vl_mpeg12_transfer_map(struct pipe_video_context *vpipe,
+                       struct pipe_transfer *transfer)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(transfer);
+
+   return ctx->pipe->transfer_map(ctx->pipe, transfer);
+}
+
+static void
+vl_mpeg12_transfer_flush_region(struct pipe_video_context *vpipe,
+                                struct pipe_transfer *transfer,
+                                const struct pipe_box *box)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(transfer);
+   assert(box);
+
+   ctx->pipe->transfer_flush_region(ctx->pipe, transfer, box);
+}
+
+static void
+vl_mpeg12_transfer_unmap(struct pipe_video_context *vpipe,
+                         struct pipe_transfer *transfer)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(transfer);
+
+   ctx->pipe->transfer_unmap(ctx->pipe, transfer);
+}
+
+static void
+vl_mpeg12_transfer_inline_write(struct pipe_video_context *vpipe,
+                                struct pipe_resource *resource,
+                                unsigned level,
+                                unsigned usage, /* a combination of PIPE_TRANSFER_x */
+                                const struct pipe_box *box,
+                                const void *data,
+                                unsigned stride,
+                                unsigned slice_stride)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(resource);
+   assert(box);
+   assert(data);
+   assert(ctx->pipe->transfer_inline_write);
+
+   ctx->pipe->transfer_inline_write(ctx->pipe, resource, level, usage,
+                                    box, data, stride, slice_stride);
+}
+
+static void
+vl_mpeg12_render_picture(struct pipe_video_context     *vpipe,
+                         struct pipe_surface           *src_surface,
+                         enum pipe_mpeg12_picture_type picture_type,
+                         /*unsigned                    num_past_surfaces,
+                         struct pipe_surface           *past_surfaces,
+                         unsigned                      num_future_surfaces,
+                         struct pipe_surface           *future_surfaces,*/
+                         struct pipe_video_rect        *src_area,
+                         struct pipe_surface           *dst_surface,
+                         struct pipe_video_rect        *dst_area,
+                         struct pipe_fence_handle      **fence)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(src_surface);
+   assert(src_area);
+   assert(dst_surface);
+   assert(dst_area);
+
+   flush_buffer(ctx);
+
+   vl_compositor_render(&ctx->compositor, src_surface,
+                        picture_type, src_area, dst_surface, dst_area, fence);
+}
+
+static void
+vl_mpeg12_set_picture_background(struct pipe_video_context *vpipe,
+                                  struct pipe_surface *bg,
+                                  struct pipe_video_rect *bg_src_rect)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(bg);
+   assert(bg_src_rect);
+
+   vl_compositor_set_background(&ctx->compositor, bg, bg_src_rect);
+}
+
+static void
+vl_mpeg12_set_picture_layers(struct pipe_video_context *vpipe,
+                             struct pipe_surface *layers[],
+                             struct pipe_video_rect *src_rects[],
+                             struct pipe_video_rect *dst_rects[],
+                             unsigned num_layers)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert((layers && src_rects && dst_rects) ||
+          (!layers && !src_rects && !dst_rects));
+
+   vl_compositor_set_layers(&ctx->compositor, layers, src_rects, dst_rects, num_layers);
+}
+
+static void
+vl_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
+                            struct pipe_surface *dt)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+   assert(dt);
+
+   if (ctx->decode_target != dt || ctx->cur_buffer == NULL) {
+      rotate_buffer(ctx);
+
+      pipe_surface_reference(&ctx->decode_target, dt);
+   }
+}
+
+static void
+vl_mpeg12_set_csc_matrix(struct pipe_video_context *vpipe, const float *mat)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(vpipe);
+
+   vl_compositor_set_csc_matrix(&ctx->compositor, mat);
+}
+
+static bool
+init_pipe_state(struct vl_mpeg12_context *ctx)
+{
+   struct pipe_rasterizer_state rast;
+   struct pipe_blend_state blend;
+   struct pipe_depth_stencil_alpha_state dsa;
+   unsigned i;
+
+   assert(ctx);
+
+   memset(&rast, 0, sizeof rast);
+   rast.flatshade = 1;
+   rast.flatshade_first = 0;
+   rast.light_twoside = 0;
+   rast.front_ccw = 1;
+   rast.cull_face = PIPE_FACE_NONE;
+   rast.fill_back = PIPE_POLYGON_MODE_FILL;
+   rast.fill_front = PIPE_POLYGON_MODE_FILL;
+   rast.offset_point = 0;
+   rast.offset_line = 0;
+   rast.scissor = 0;
+   rast.poly_smooth = 0;
+   rast.poly_stipple_enable = 0;
+   rast.sprite_coord_enable = 0;
+   rast.point_size_per_vertex = 0;
+   rast.multisample = 0;
+   rast.line_smooth = 0;
+   rast.line_stipple_enable = 0;
+   rast.line_stipple_factor = 0;
+   rast.line_stipple_pattern = 0;
+   rast.line_last_pixel = 0;
+   rast.line_width = 1;
+   rast.point_smooth = 0;
+   rast.point_quad_rasterization = 0;
+   rast.point_size_per_vertex = 1;
+   rast.offset_units = 1;
+   rast.offset_scale = 1;
+   rast.gl_rasterization_rules = 1;
+
+   ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
+   ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
+
+   memset(&blend, 0, sizeof blend);
+
+   blend.independent_blend_enable = 0;
+   blend.rt[0].blend_enable = 0;
+   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.logicop_enable = 0;
+   blend.logicop_func = PIPE_LOGICOP_CLEAR;
+   /* Needed to allow color writes to FB, even if blending disabled */
+   blend.rt[0].colormask = PIPE_MASK_RGBA;
+   blend.dither = 0;
+   ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend);
+   ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
+
+   memset(&dsa, 0, sizeof dsa);
+   dsa.depth.enabled = 0;
+   dsa.depth.writemask = 0;
+   dsa.depth.func = PIPE_FUNC_ALWAYS;
+   for (i = 0; i < 2; ++i) {
+      dsa.stencil[i].enabled = 0;
+      dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
+      dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].valuemask = 0;
+      dsa.stencil[i].writemask = 0;
+   }
+   dsa.alpha.enabled = 0;
+   dsa.alpha.func = PIPE_FUNC_ALWAYS;
+   dsa.alpha.ref_value = 0;
+   ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa);
+   ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
+
+   return true;
+}
+
+struct pipe_video_context *
+vl_create_mpeg12_context(struct pipe_context *pipe,
+                         enum pipe_video_profile profile,
+                         enum pipe_video_chroma_format chroma_format,
+                         unsigned width, unsigned height,
+                         bool pot_buffers,
+                         enum pipe_format decode_format)
+{
+   struct pipe_resource *idct_matrix;
+   unsigned buffer_width, buffer_height;
+   unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
+   struct vl_mpeg12_context *ctx;
+
+   assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
+
+   ctx = CALLOC_STRUCT(vl_mpeg12_context);
+
+   if (!ctx)
+      return NULL;
+
+   /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
+   assert(pot_buffers);
+
+   buffer_width = pot_buffers ? util_next_power_of_two(width) : width;
+   buffer_height = pot_buffers ? util_next_power_of_two(height) : height;
+
+   ctx->base.profile = profile;
+   ctx->base.chroma_format = chroma_format;
+   ctx->base.width = width;
+   ctx->base.height = height;
+
+   ctx->base.screen = pipe->screen;
+
+   ctx->base.destroy = vl_mpeg12_destroy;
+   ctx->base.get_param = vl_mpeg12_get_param;
+   ctx->base.is_format_supported = vl_mpeg12_is_format_supported;
+   ctx->base.create_surface = vl_mpeg12_create_surface;
+   ctx->base.decode_macroblocks = vl_mpeg12_decode_macroblocks;
+   ctx->base.render_picture = vl_mpeg12_render_picture;
+   ctx->base.clear_render_target = vl_mpeg12_clear_render_target;
+   ctx->base.resource_copy_region = vl_mpeg12_resource_copy_region;
+   ctx->base.get_transfer = vl_mpeg12_get_transfer;
+   ctx->base.transfer_destroy = vl_mpeg12_transfer_destroy;
+   ctx->base.transfer_map = vl_mpeg12_transfer_map;
+   ctx->base.transfer_flush_region = vl_mpeg12_transfer_flush_region;
+   ctx->base.transfer_unmap = vl_mpeg12_transfer_unmap;
+   if (pipe->transfer_inline_write)
+      ctx->base.transfer_inline_write = vl_mpeg12_transfer_inline_write;
+   ctx->base.set_picture_background = vl_mpeg12_set_picture_background;
+   ctx->base.set_picture_layers = vl_mpeg12_set_picture_layers;
+   ctx->base.set_decode_target = vl_mpeg12_set_decode_target;
+   ctx->base.set_csc_matrix = vl_mpeg12_set_csc_matrix;
+
+   ctx->pipe = pipe;
+   ctx->decode_format = decode_format;
+
+   ctx->quads = vl_vb_upload_quads(ctx->pipe, 2, 2);
+   ctx->vertex_buffer_size = width / MACROBLOCK_WIDTH * height / MACROBLOCK_HEIGHT;
+   ctx->vertex_elems_state = vl_vb_get_elems_state(ctx->pipe, true);
+
+   if (ctx->vertex_elems_state == NULL) {
+      ctx->pipe->destroy(ctx->pipe);
+      FREE(ctx);
+      return NULL;
+   }
+
+   if (!(idct_matrix = vl_idct_upload_matrix(ctx->pipe)))
+      return false;
+
+   if (!vl_idct_init(&ctx->idct_y, ctx->pipe, buffer_width, buffer_height,
+                     2, 2, TGSI_SWIZZLE_X, idct_matrix))
+      return false;
+
+   if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+      chroma_width = buffer_width / 2;
+      chroma_height = buffer_height / 2;
+      chroma_blocks_x = 1;
+      chroma_blocks_y = 1;
+   } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
+      chroma_width = buffer_width;
+      chroma_height = buffer_height / 2;
+      chroma_blocks_x = 2;
+      chroma_blocks_y = 1;
+   } else {
+      chroma_width = buffer_width;
+      chroma_height = buffer_height;
+      chroma_blocks_x = 2;
+      chroma_blocks_y = 2;
+   }
+
+   if(!vl_idct_init(&ctx->idct_cr, ctx->pipe, chroma_width, chroma_height,
+                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
+      return false;
+
+   if(!vl_idct_init(&ctx->idct_cb, ctx->pipe, chroma_width, chroma_height,
+                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
+      return false;
+
+   if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
+                                   buffer_width, buffer_height, chroma_format)) {
+      ctx->pipe->destroy(ctx->pipe);
+      FREE(ctx);
+      return NULL;
+   }
+
+   ctx->buffer_map = util_new_keymap(sizeof(unsigned), -1, delete_buffer);
+   if (!ctx->buffer_map) {
+      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+      ctx->pipe->destroy(ctx->pipe);
+      FREE(ctx);
+      return NULL;
+   }
+
+   if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) {
+      util_delete_keymap(ctx->buffer_map, ctx);
+      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+      ctx->pipe->destroy(ctx->pipe);
+      FREE(ctx);
+      return NULL;
+   }
+
+   if (!init_pipe_state(ctx)) {
+      vl_compositor_cleanup(&ctx->compositor);
+      util_delete_keymap(ctx->buffer_map, ctx);
+      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+      ctx->pipe->destroy(ctx->pipe);
+      FREE(ctx);
+      return NULL;
+   }
+
+   return &ctx->base;
+}
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
new file mode 100644
index 00000000000..6ad734c9fda
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -0,0 +1,90 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef VL_MPEG12_CONTEXT_H
+#define VL_MPEG12_CONTEXT_H
+
+#include <pipe/p_video_context.h>
+#include "vl_idct.h"
+#include "vl_mpeg12_mc_renderer.h"
+#include "vl_compositor.h"
+
+struct pipe_screen;
+struct pipe_context;
+
+struct vl_mpeg12_context
+{
+   struct pipe_video_context base;
+   struct pipe_context *pipe;
+   struct pipe_surface *decode_target;
+
+   struct pipe_vertex_buffer quads;
+   unsigned vertex_buffer_size;
+   void *vertex_elems_state;
+
+   struct vl_idct idct_y, idct_cr, idct_cb;
+   struct vl_mpeg12_mc_renderer mc_renderer;
+
+   struct keymap *buffer_map;
+   struct vl_mpeg12_buffer *cur_buffer;
+   struct vl_compositor compositor;
+
+   void *rast;
+   void *dsa;
+   void *blend;
+
+   enum pipe_format decode_format;
+};
+
+struct vl_mpeg12_buffer
+{
+   struct vl_vertex_buffer vertex_stream;
+
+   union
+   {
+      struct pipe_vertex_buffer all[2];
+      struct {
+         struct pipe_vertex_buffer quad, stream;
+      } individual;
+   } vertex_bufs;
+
+   struct vl_idct_buffer idct_y, idct_cb, idct_cr;
+
+   struct vl_mpeg12_mc_buffer mc;
+};
+
+/* drivers can call this function in their pipe_video_context constructors and pass it
+   an accelerated pipe_context along with suitable buffering modes, etc */
+struct pipe_video_context *
+vl_create_mpeg12_context(struct pipe_context *pipe,
+                         enum pipe_video_profile profile,
+                         enum pipe_video_chroma_format chroma_format,
+                         unsigned width, unsigned height,
+                         bool pot_buffers,
+                         enum pipe_format decode_format);
+
+#endif /* VL_MPEG12_CONTEXT_H */
diff --git a/src/gallium/drivers/nv40/nv40_video_context.c b/src/gallium/drivers/nv40/nv40_video_context.c
index e6e57ee787c..35395e848fc 100644
--- a/src/gallium/drivers/nv40/nv40_video_context.c
+++ b/src/gallium/drivers/nv40/nv40_video_context.c
@@ -26,7 +26,8 @@
  **************************************************************************/
 
 #include "nv40_video_context.h"
-#include <softpipe/sp_video_context.h>
+#include "util/u_video.h"
+#include <vl/vl_mpeg12_context.h>
 
 struct pipe_video_context *
 nv40_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
@@ -41,7 +42,14 @@ nv40_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
    if (!pipe)
       return NULL;
 
-   return sp_video_create_ex(pipe, profile, chroma_format, width, height,
-                             VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
-                             true);
+   switch (u_reduce_video_profile(profile)) {
+      case PIPE_VIDEO_CODEC_MPEG12:
+         return vl_create_mpeg12_context(pipe, profile,
+                                         chroma_format,
+                                         width, height,
+                                         true,
+                                         PIPE_FORMAT_XYUV);
+      default:
+         return NULL;
+   }
 }
diff --git a/src/gallium/drivers/nvfx/nvfx_video_context.c b/src/gallium/drivers/nvfx/nvfx_video_context.c
index 58e1c0baa27..0456926c9ad 100644
--- a/src/gallium/drivers/nvfx/nvfx_video_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_video_context.c
@@ -26,7 +26,8 @@
  **************************************************************************/
 
 #include "nvfx_video_context.h"
-#include <softpipe/sp_video_context.h>
+#include "util/u_video.h"
+#include <vl/vl_mpeg12_context.h>
 
 struct pipe_video_context *
 nvfx_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
@@ -41,8 +42,14 @@ nvfx_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
    if (!pipe)
       return NULL;
 
-   return sp_video_create_ex(pipe, profile, chroma_format, width, height,
-                             VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
-                             true,
-                             PIPE_FORMAT_VUYX);
+   switch (u_reduce_video_profile(profile)) {
+      case PIPE_VIDEO_CODEC_MPEG12:
+         return vl_create_mpeg12_context(pipe, profile,
+                                         chroma_format,
+                                         width, height,
+                                         true,
+                                         PIPE_FORMAT_XYUV);
+      default:
+         return NULL;
+   }
 }
diff --git a/src/gallium/drivers/r600/r600_video_context.c b/src/gallium/drivers/r600/r600_video_context.c
index 8a569cd0c65..0b915d62143 100644
--- a/src/gallium/drivers/r600/r600_video_context.c
+++ b/src/gallium/drivers/r600/r600_video_context.c
@@ -1,5 +1,33 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
 #include "r600_video_context.h"
-#include <softpipe/sp_video_context.h>
+#include "util/u_video.h"
+#include <vl/vl_mpeg12_context.h>
 
 struct pipe_video_context *
 r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
@@ -14,7 +42,14 @@ r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
    if (!pipe)
       return NULL;
 
-   return sp_video_create_ex(pipe, profile, chroma_format, width, height,
-                             true,
-                             PIPE_FORMAT_VUYX);
+   switch (u_reduce_video_profile(profile)) {
+      case PIPE_VIDEO_CODEC_MPEG12:
+         return vl_create_mpeg12_context(pipe, profile,
+                                         chroma_format,
+                                         width, height,
+                                         true,
+                                         PIPE_FORMAT_XYUV);
+      default:
+         return NULL;
+   }
 }
diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile
index 8876bd16398..9403e6cf0b8 100644
--- a/src/gallium/drivers/softpipe/Makefile
+++ b/src/gallium/drivers/softpipe/Makefile
@@ -33,7 +33,6 @@ C_SOURCES = \
 	sp_tex_sample.c \
 	sp_tex_tile_cache.c \
 	sp_tile_cache.c \
-	sp_surface.c \
-	sp_video_context.c
+	sp_surface.c
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index a06817c5735..401e3177b48 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,16 +22,18 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 
 #include "util/u_memory.h"
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
+#include "util/u_video.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 #include "draw/draw_context.h"
+#include "vl/vl_mpeg12_context.h"
 
 #include "state_tracker/sw_winsys.h"
 #include "tgsi/tgsi_exec.h"
@@ -39,7 +41,6 @@
 #include "sp_texture.h"
 #include "sp_screen.h"
 #include "sp_context.h"
-#include "sp_video_context.h"
 #include "sp_fence.h"
 #include "sp_public.h"
 
@@ -286,6 +287,33 @@ softpipe_flush_frontbuffer(struct pipe_screen *_screen,
       winsys->displaytarget_display(winsys, texture->dt, context_private);
 }
 
+static struct pipe_video_context *
+sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
+                enum pipe_video_chroma_format chroma_format,
+                unsigned width, unsigned height, void *priv)
+{
+   struct pipe_context *pipe;
+
+   assert(screen);
+   assert(width && height);
+
+   pipe = screen->context_create(screen, NULL);
+   if (!pipe)
+      return NULL;
+
+   /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture with softpipe */
+   switch (u_reduce_video_profile(profile)) {
+      case PIPE_VIDEO_CODEC_MPEG12:
+         return vl_create_mpeg12_context(pipe, profile,
+                                         chroma_format,
+                                         width, height,
+                                         true,
+                                         PIPE_FORMAT_XYUV);
+      default:
+         return NULL;
+   }
+}
+
 /**
  * Create a new pipe_screen object
  * Note: we're not presently subclassing pipe_screen (no softpipe_screen).
diff --git a/src/gallium/drivers/softpipe/sp_video_context.c b/src/gallium/drivers/softpipe/sp_video_context.c
deleted file mode 100644
index 7e3519e694f..00000000000
--- a/src/gallium/drivers/softpipe/sp_video_context.c
+++ /dev/null
@@ -1,775 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
-
-#include "sp_video_context.h"
-#include <pipe/p_shader_tokens.h>
-#include <util/u_inlines.h>
-#include <util/u_memory.h>
-#include <util/u_keymap.h>
-#include <util/u_rect.h>
-#include <util/u_video.h>
-#include <util/u_surface.h>
-#include <vl/vl_defines.h>
-
-#define NUM_BUFFERS 2
-
-static void
-flush_buffer(struct sp_mpeg12_context *ctx)
-{
-   unsigned ne_start, ne_num, e_start, e_num;
-   assert(ctx);
-
-   if(ctx->cur_buffer != NULL) {
-
-      vl_vb_unmap(&ctx->cur_buffer->vertex_stream, ctx->pipe);
-      vl_idct_unmap_buffers(&ctx->idct_y, &ctx->cur_buffer->idct_y);
-      vl_idct_unmap_buffers(&ctx->idct_cr, &ctx->cur_buffer->idct_cr);
-      vl_idct_unmap_buffers(&ctx->idct_cb, &ctx->cur_buffer->idct_cb);
-      vl_vb_restart(&ctx->cur_buffer->vertex_stream,
-		    &ne_start, &ne_num, &e_start, &e_num);
-
-      ctx->pipe->set_vertex_buffers(ctx->pipe, 2, ctx->cur_buffer->vertex_bufs.all);
-      ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
-      vl_idct_flush(&ctx->idct_y, &ctx->cur_buffer->idct_y, ne_num);
-      vl_idct_flush(&ctx->idct_cr, &ctx->cur_buffer->idct_cr, ne_num);
-      vl_idct_flush(&ctx->idct_cb, &ctx->cur_buffer->idct_cb, ne_num);
-      vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &ctx->cur_buffer->mc,
-                                  ne_start, ne_num, e_start, e_num);
-
-      ctx->cur_buffer = NULL;
-   }
-}
-
-static void
-rotate_buffer(struct sp_mpeg12_context *ctx)
-{
-   struct pipe_resource *y, *cr, *cb;
-   static unsigned key = 0;
-   struct sp_mpeg12_buffer *buffer;
-
-   assert(ctx);
-
-   flush_buffer(ctx);
-
-   buffer = (struct sp_mpeg12_buffer*)util_keymap_lookup(ctx->buffer_map, &key);
-   if (!buffer) {
-      boolean added_to_map;
-
-      buffer = CALLOC_STRUCT(sp_mpeg12_buffer);
-      if (buffer == NULL)
-         return;
-
-      buffer->vertex_bufs.individual.quad.stride = ctx->quads.stride;
-      buffer->vertex_bufs.individual.quad.buffer_offset = ctx->quads.buffer_offset;
-      pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, ctx->quads.buffer);
-
-      buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, ctx->pipe,
-                                                         ctx->vertex_buffer_size);
-      if (!(y = vl_idct_init_buffer(&ctx->idct_y, &buffer->idct_y))) {
-         FREE(buffer);
-         return;
-      }
-
-      if (!(cr = vl_idct_init_buffer(&ctx->idct_cr, &buffer->idct_cr))) {
-         FREE(buffer);
-         return;
-      }
-
-      if (!(cb = vl_idct_init_buffer(&ctx->idct_cb, &buffer->idct_cb))) {
-         FREE(buffer);
-         return;
-      }
-
-      if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &buffer->mc, y, cr, cb)) {
-         FREE(buffer);
-         return;
-      }
-
-      added_to_map = util_keymap_insert(ctx->buffer_map, &key, buffer, ctx);
-      assert(added_to_map);
-   }
-   ++key;
-   key %= NUM_BUFFERS;
-   ctx->cur_buffer = buffer;
-
-   vl_vb_map(&ctx->cur_buffer->vertex_stream, ctx->pipe);
-   vl_idct_map_buffers(&ctx->idct_y, &ctx->cur_buffer->idct_y);
-   vl_idct_map_buffers(&ctx->idct_cr, &ctx->cur_buffer->idct_cr);
-   vl_idct_map_buffers(&ctx->idct_cb, &ctx->cur_buffer->idct_cb);
-}
-
-static void
-delete_buffer(const struct keymap *map,
-              const void *key, void *data,
-              void *user)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)user;
-   struct sp_mpeg12_buffer *buf = (struct sp_mpeg12_buffer*)data;
-
-   assert(map);
-   assert(key);
-   assert(data);
-   assert(user);
-
-   vl_vb_cleanup(&buf->vertex_stream);
-   vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct_y);
-   vl_idct_cleanup_buffer(&ctx->idct_cb, &buf->idct_cb);
-   vl_idct_cleanup_buffer(&ctx->idct_cr, &buf->idct_cr);
-   vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &buf->mc);
-}
-
-static void
-upload_buffer(struct sp_mpeg12_context *ctx,
-              struct sp_mpeg12_buffer *buffer,
-              struct pipe_mpeg12_macroblock *mb)
-{
-   short *blocks;
-   unsigned tb, x, y;
-
-   assert(ctx);
-   assert(buffer);
-   assert(mb);
-
-   blocks = mb->blocks;
-
-   for (y = 0; y < 2; ++y) {
-      for (x = 0; x < 2; ++x, ++tb) {
-         if (mb->cbp & (*ctx->mc_renderer.empty_block_mask)[0][y][x]) {
-            vl_idct_add_block(&buffer->idct_y, mb->mbx * 2 + x, mb->mby * 2 + y, blocks);
-            blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
-         }
-      }
-   }
-
-   /* TODO: Implement 422, 444 */
-   assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-
-   for (tb = 1; tb < 3; ++tb) {
-      if (mb->cbp & (*ctx->mc_renderer.empty_block_mask)[tb][0][0]) {
-         if(tb == 1)
-            vl_idct_add_block(&buffer->idct_cb, mb->mbx, mb->mby, blocks);
-         else
-            vl_idct_add_block(&buffer->idct_cr, mb->mbx, mb->mby, blocks);
-         blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
-      }
-   }
-}
-
-static void
-sp_mpeg12_destroy(struct pipe_video_context *vpipe)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-
-   flush_buffer(ctx);
-
-   /* Asserted in softpipe_delete_fs_state() for some reason */
-   ctx->pipe->bind_vs_state(ctx->pipe, NULL);
-   ctx->pipe->bind_fs_state(ctx->pipe, NULL);
-
-   ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend);
-   ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast);
-   ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
-
-   pipe_surface_reference(&ctx->decode_target, NULL);
-   vl_compositor_cleanup(&ctx->compositor);
-   util_delete_keymap(ctx->buffer_map, ctx);
-   vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-   vl_idct_cleanup(&ctx->idct_y);
-   vl_idct_cleanup(&ctx->idct_cr);
-   vl_idct_cleanup(&ctx->idct_cb);
-   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
-   pipe_resource_reference(&ctx->quads.buffer, NULL);
-   ctx->pipe->destroy(ctx->pipe);
-
-   FREE(ctx);
-}
-
-static int
-sp_mpeg12_get_param(struct pipe_video_context *vpipe, int param)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-
-   switch (param) {
-      case PIPE_CAP_NPOT_TEXTURES:
-         /* XXX: Temporary; not all paths are NPOT-tested */
-#if 0
-         return ctx->pipe->screen->get_param(ctx->pipe->screen, param);
-#endif
-         return FALSE;
-      case PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT:
-         return ctx->decode_format;
-      default:
-      {
-         debug_printf("Softpipe: Unknown PIPE_CAP %d\n", param);
-         return 0;
-      }
-   }
-}
-
-static struct pipe_surface *
-sp_mpeg12_create_surface(struct pipe_video_context *vpipe,
-                         struct pipe_resource *resource,
-                         const struct pipe_surface *templat)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-
-   return ctx->pipe->create_surface(ctx->pipe, resource, templat);
-}
-
-static boolean
-sp_mpeg12_is_format_supported(struct pipe_video_context *vpipe,
-                              enum pipe_format format,
-                              unsigned usage,
-                              unsigned geom)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-
-   /* XXX: Temporary; not all paths are NPOT-tested */
-   if (geom & PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO)
-      return FALSE;
-
-
-   return ctx->pipe->screen->is_format_supported(ctx->pipe->screen, format, PIPE_TEXTURE_2D,
-                                                 0, usage, geom);
-}
-
-static void
-sp_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
-                             struct pipe_surface *past,
-                             struct pipe_surface *future,
-                             unsigned num_macroblocks,
-                             struct pipe_macroblock *macroblocks,
-                             struct pipe_fence_handle **fence)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-   struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks;
-   unsigned i;
-
-   assert(vpipe);
-   assert(num_macroblocks);
-   assert(macroblocks);
-   assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
-   assert(ctx->decode_target);
-   assert(ctx->cur_buffer);
-
-   for ( i = 0; i < num_macroblocks; ++i ) {
-      vl_vb_add_block(&ctx->cur_buffer->vertex_stream, &mpeg12_macroblocks[i],
-                      ctx->mc_renderer.empty_block_mask);
-      upload_buffer(ctx, ctx->cur_buffer, &mpeg12_macroblocks[i]);
-   }
-
-   vl_mpeg12_mc_set_surfaces(&ctx->mc_renderer, &ctx->cur_buffer->mc,
-                             ctx->decode_target, past, future, fence);
-}
-
-static void
-sp_mpeg12_clear_render_target(struct pipe_video_context *vpipe,
-                       struct pipe_surface *dst,
-                       unsigned dstx, unsigned dsty,
-                       unsigned width, unsigned height)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-   float rgba[4] = { 0, 0, 0, 0 };
-
-   assert(vpipe);
-   assert(dst);
-
-   if (ctx->pipe->clear_render_target)
-      ctx->pipe->clear_render_target(ctx->pipe, dst, rgba, dstx, dsty, width, height);
-   else
-      util_clear_render_target(ctx->pipe, dst, rgba, dstx, dsty, width, height);
-}
-
-static void
-sp_mpeg12_resource_copy_region(struct pipe_video_context *vpipe,
-                       struct pipe_surface *dst,
-                       unsigned dstx, unsigned dsty,
-                       struct pipe_surface *src,
-                       unsigned srcx, unsigned srcy,
-                       unsigned width, unsigned height)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(dst);
-
-   struct pipe_box box;
-   box.x = srcx;
-   box.y = srcy;
-   box.z = 0;
-   box.width = width;
-   box.height = height;
-
-   if (ctx->pipe->resource_copy_region)
-      ctx->pipe->resource_copy_region(ctx->pipe, dst->texture, dst->u.tex.level,
-                                      dstx, dsty, dst->u.tex.first_layer,
-                                      src->texture, src->u.tex.level, &box);
-   else
-      util_resource_copy_region(ctx->pipe, dst->texture, dst->u.tex.level,
-                                dstx, dsty, dst->u.tex.first_layer,
-                                src->texture, src->u.tex.level, &box);
-}
-
-static struct pipe_transfer*
-sp_mpeg12_get_transfer(struct pipe_video_context *vpipe,
-                       struct pipe_resource *resource,
-                       unsigned level,
-                       unsigned usage,  /* a combination of PIPE_TRANSFER_x */
-                       const struct pipe_box *box)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(resource);
-   assert(box);
-
-   return ctx->pipe->get_transfer(ctx->pipe, resource, level, usage, box);
-}
-
-static void
-sp_mpeg12_transfer_destroy(struct pipe_video_context *vpipe,
-                           struct pipe_transfer *transfer)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(transfer);
-
-   ctx->pipe->transfer_destroy(ctx->pipe, transfer);
-}
-
-static void*
-sp_mpeg12_transfer_map(struct pipe_video_context *vpipe,
-                       struct pipe_transfer *transfer)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(transfer);
-
-   return ctx->pipe->transfer_map(ctx->pipe, transfer);
-}
-
-static void
-sp_mpeg12_transfer_flush_region(struct pipe_video_context *vpipe,
-                                struct pipe_transfer *transfer,
-                                const struct pipe_box *box)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(transfer);
-   assert(box);
-
-   ctx->pipe->transfer_flush_region(ctx->pipe, transfer, box);
-}
-
-static void
-sp_mpeg12_transfer_unmap(struct pipe_video_context *vpipe,
-                         struct pipe_transfer *transfer)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(transfer);
-
-   ctx->pipe->transfer_unmap(ctx->pipe, transfer);
-}
-
-static void
-sp_mpeg12_transfer_inline_write(struct pipe_video_context *vpipe,
-                                struct pipe_resource *resource,
-                                unsigned level,
-                                unsigned usage, /* a combination of PIPE_TRANSFER_x */
-                                const struct pipe_box *box,
-                                const void *data,
-                                unsigned stride,
-                                unsigned slice_stride)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(resource);
-   assert(box);
-   assert(data);
-   assert(ctx->pipe->transfer_inline_write);
-
-   ctx->pipe->transfer_inline_write(ctx->pipe, resource, level, usage,
-                                    box, data, stride, slice_stride);
-}
-
-static void
-sp_mpeg12_render_picture(struct pipe_video_context     *vpipe,
-                         struct pipe_surface           *src_surface,
-                         enum pipe_mpeg12_picture_type picture_type,
-                         /*unsigned                    num_past_surfaces,
-                         struct pipe_surface           *past_surfaces,
-                         unsigned                      num_future_surfaces,
-                         struct pipe_surface           *future_surfaces,*/
-                         struct pipe_video_rect        *src_area,
-                         struct pipe_surface           *dst_surface,
-                         struct pipe_video_rect        *dst_area,
-                         struct pipe_fence_handle      **fence)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(src_surface);
-   assert(src_area);
-   assert(dst_surface);
-   assert(dst_area);
-
-   flush_buffer(ctx);
-
-   vl_compositor_render(&ctx->compositor, src_surface,
-                        picture_type, src_area, dst_surface, dst_area, fence);
-}
-
-static void
-sp_mpeg12_set_picture_background(struct pipe_video_context *vpipe,
-                                  struct pipe_surface *bg,
-                                  struct pipe_video_rect *bg_src_rect)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(bg);
-   assert(bg_src_rect);
-
-   vl_compositor_set_background(&ctx->compositor, bg, bg_src_rect);
-}
-
-static void
-sp_mpeg12_set_picture_layers(struct pipe_video_context *vpipe,
-                             struct pipe_surface *layers[],
-                             struct pipe_video_rect *src_rects[],
-                             struct pipe_video_rect *dst_rects[],
-                             unsigned num_layers)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert((layers && src_rects && dst_rects) ||
-          (!layers && !src_rects && !dst_rects));
-
-   vl_compositor_set_layers(&ctx->compositor, layers, src_rects, dst_rects, num_layers);
-}
-
-static void
-sp_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
-                            struct pipe_surface *dt)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(dt);
-
-   if (ctx->decode_target != dt || ctx->cur_buffer == NULL) {
-      rotate_buffer(ctx);
-
-      pipe_surface_reference(&ctx->decode_target, dt);
-   }
-}
-
-static void
-sp_mpeg12_set_csc_matrix(struct pipe_video_context *vpipe, const float *mat)
-{
-   struct sp_mpeg12_context *ctx = (struct sp_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-
-   vl_compositor_set_csc_matrix(&ctx->compositor, mat);
-}
-
-static bool
-init_pipe_state(struct sp_mpeg12_context *ctx)
-{
-   struct pipe_rasterizer_state rast;
-   struct pipe_blend_state blend;
-   struct pipe_depth_stencil_alpha_state dsa;
-   unsigned i;
-
-   assert(ctx);
-
-   memset(&rast, 0, sizeof rast);
-   rast.flatshade = 1;
-   rast.flatshade_first = 0;
-   rast.light_twoside = 0;
-   rast.front_ccw = 1;
-   rast.cull_face = PIPE_FACE_NONE;
-   rast.fill_back = PIPE_POLYGON_MODE_FILL;
-   rast.fill_front = PIPE_POLYGON_MODE_FILL;
-   rast.offset_point = 0;
-   rast.offset_line = 0;
-   rast.scissor = 0;
-   rast.poly_smooth = 0;
-   rast.poly_stipple_enable = 0;
-   rast.sprite_coord_enable = 0;
-   rast.point_size_per_vertex = 0;
-   rast.multisample = 0;
-   rast.line_smooth = 0;
-   rast.line_stipple_enable = 0;
-   rast.line_stipple_factor = 0;
-   rast.line_stipple_pattern = 0;
-   rast.line_last_pixel = 0;
-   rast.line_width = 1;
-   rast.point_smooth = 0;
-   rast.point_quad_rasterization = 0;
-   rast.point_size_per_vertex = 1;
-   rast.offset_units = 1;
-   rast.offset_scale = 1;
-   rast.gl_rasterization_rules = 1;
-
-   ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
-   ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
-
-   memset(&blend, 0, sizeof blend);
-
-   blend.independent_blend_enable = 0;
-   blend.rt[0].blend_enable = 0;
-   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
-   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
-   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
-   blend.logicop_enable = 0;
-   blend.logicop_func = PIPE_LOGICOP_CLEAR;
-   /* Needed to allow color writes to FB, even if blending disabled */
-   blend.rt[0].colormask = PIPE_MASK_RGBA;
-   blend.dither = 0;
-   ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend);
-   ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
-
-   memset(&dsa, 0, sizeof dsa);
-   dsa.depth.enabled = 0;
-   dsa.depth.writemask = 0;
-   dsa.depth.func = PIPE_FUNC_ALWAYS;
-   for (i = 0; i < 2; ++i) {
-      dsa.stencil[i].enabled = 0;
-      dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
-      dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
-      dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
-      dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
-      dsa.stencil[i].valuemask = 0;
-      dsa.stencil[i].writemask = 0;
-   }
-   dsa.alpha.enabled = 0;
-   dsa.alpha.func = PIPE_FUNC_ALWAYS;
-   dsa.alpha.ref_value = 0;
-   ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa);
-   ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
-
-   return true;
-}
-
-static struct pipe_video_context *
-sp_mpeg12_create(struct pipe_context *pipe, enum pipe_video_profile profile,
-                 enum pipe_video_chroma_format chroma_format,
-                 unsigned width, unsigned height,
-                 bool pot_buffers,
-                 enum pipe_format decode_format)
-{
-   struct pipe_resource *idct_matrix;
-   unsigned buffer_width, buffer_height;
-   unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
-   struct sp_mpeg12_context *ctx;
-
-   assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
-
-   ctx = CALLOC_STRUCT(sp_mpeg12_context);
-
-   if (!ctx)
-      return NULL;
-
-   /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
-   assert(pot_buffers);
-
-   buffer_width = pot_buffers ? util_next_power_of_two(width) : width;
-   buffer_height = pot_buffers ? util_next_power_of_two(height) : height;
-
-   ctx->base.profile = profile;
-   ctx->base.chroma_format = chroma_format;
-   ctx->base.width = width;
-   ctx->base.height = height;
-
-   ctx->base.screen = pipe->screen;
-
-   ctx->base.destroy = sp_mpeg12_destroy;
-   ctx->base.get_param = sp_mpeg12_get_param;
-   ctx->base.is_format_supported = sp_mpeg12_is_format_supported;
-   ctx->base.create_surface = sp_mpeg12_create_surface;
-   ctx->base.decode_macroblocks = sp_mpeg12_decode_macroblocks;
-   ctx->base.render_picture = sp_mpeg12_render_picture;
-   ctx->base.clear_render_target = sp_mpeg12_clear_render_target;
-   ctx->base.resource_copy_region = sp_mpeg12_resource_copy_region;
-   ctx->base.get_transfer = sp_mpeg12_get_transfer;
-   ctx->base.transfer_destroy = sp_mpeg12_transfer_destroy;
-   ctx->base.transfer_map = sp_mpeg12_transfer_map;
-   ctx->base.transfer_flush_region = sp_mpeg12_transfer_flush_region;
-   ctx->base.transfer_unmap = sp_mpeg12_transfer_unmap;
-   if (pipe->transfer_inline_write)
-      ctx->base.transfer_inline_write = sp_mpeg12_transfer_inline_write;
-   ctx->base.set_picture_background = sp_mpeg12_set_picture_background;
-   ctx->base.set_picture_layers = sp_mpeg12_set_picture_layers;
-   ctx->base.set_decode_target = sp_mpeg12_set_decode_target;
-   ctx->base.set_csc_matrix = sp_mpeg12_set_csc_matrix;
-
-   ctx->pipe = pipe;
-   ctx->decode_format = decode_format;
-
-   ctx->quads = vl_vb_upload_quads(ctx->pipe, 2, 2);
-   ctx->vertex_buffer_size = width / MACROBLOCK_WIDTH * height / MACROBLOCK_HEIGHT;
-   ctx->vertex_elems_state = vl_vb_get_elems_state(ctx->pipe, true);
-
-   if (ctx->vertex_elems_state == NULL) {
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
-   }
-
-   if (!(idct_matrix = vl_idct_upload_matrix(ctx->pipe)))
-      return false;
-
-   if (!vl_idct_init(&ctx->idct_y, ctx->pipe, buffer_width, buffer_height,
-                     2, 2, TGSI_SWIZZLE_X, idct_matrix))
-      return false;
-
-   if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      chroma_width = buffer_width / 2;
-      chroma_height = buffer_height / 2;
-      chroma_blocks_x = 1;
-      chroma_blocks_y = 1;
-   } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
-      chroma_width = buffer_width;
-      chroma_height = buffer_height / 2;
-      chroma_blocks_x = 2;
-      chroma_blocks_y = 1;
-   } else {
-      chroma_width = buffer_width;
-      chroma_height = buffer_height;
-      chroma_blocks_x = 2;
-      chroma_blocks_y = 2;
-   }
-
-   if(!vl_idct_init(&ctx->idct_cr, ctx->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
-      return false;
-
-   if(!vl_idct_init(&ctx->idct_cb, ctx->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
-      return false;
-
-   if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
-                                   buffer_width, buffer_height, chroma_format)) {
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
-   }
-
-   ctx->buffer_map = util_new_keymap(sizeof(unsigned), -1, delete_buffer);
-   if (!ctx->buffer_map) {
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
-   }
-
-   if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) {
-      util_delete_keymap(ctx->buffer_map, ctx);
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
-   }
-
-   if (!init_pipe_state(ctx)) {
-      vl_compositor_cleanup(&ctx->compositor);
-      util_delete_keymap(ctx->buffer_map, ctx);
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
-   }
-
-   return &ctx->base;
-}
-
-struct pipe_video_context *
-sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
-                enum pipe_video_chroma_format chroma_format,
-                unsigned width, unsigned height, void *priv)
-{
-   struct pipe_context *pipe;
-
-   assert(screen);
-   assert(width && height);
-
-   pipe = screen->context_create(screen, NULL);
-   if (!pipe)
-      return NULL;
-
-   /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture with softpipe */
-   return sp_video_create_ex(pipe, profile,
-                             chroma_format,
-                             width, height,
-                             true,
-                             PIPE_FORMAT_XYUV);
-}
-
-struct pipe_video_context *
-sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
-                   enum pipe_video_chroma_format chroma_format,
-                   unsigned width, unsigned height,
-                   bool pot_buffers,
-                   enum pipe_format decode_format)
-{
-   assert(pipe);
-   assert(width && height);
-
-   switch (u_reduce_video_profile(profile)) {
-      case PIPE_VIDEO_CODEC_MPEG12:
-         return sp_mpeg12_create(pipe, profile,
-                                 chroma_format,
-                                 width, height,
-                                 pot_buffers,
-                                 decode_format);
-      default:
-         return NULL;
-   }
-}
diff --git a/src/gallium/drivers/softpipe/sp_video_context.h b/src/gallium/drivers/softpipe/sp_video_context.h
deleted file mode 100644
index 04368a64a53..00000000000
--- a/src/gallium/drivers/softpipe/sp_video_context.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef SP_VIDEO_CONTEXT_H
-#define SP_VIDEO_CONTEXT_H
-
-#include <pipe/p_video_context.h>
-#include <vl/vl_idct.h>
-#include <vl/vl_mpeg12_mc_renderer.h>
-#include <vl/vl_compositor.h>
-
-struct pipe_screen;
-struct pipe_context;
-
-struct sp_mpeg12_context
-{
-   struct pipe_video_context base;
-   struct pipe_context *pipe;
-   struct pipe_surface *decode_target;
-
-   struct pipe_vertex_buffer quads;
-   unsigned vertex_buffer_size;
-   void *vertex_elems_state;
-
-   struct vl_idct idct_y, idct_cr, idct_cb;
-   struct vl_mpeg12_mc_renderer mc_renderer;
-
-   struct keymap *buffer_map;
-   struct sp_mpeg12_buffer *cur_buffer;
-   struct vl_compositor compositor;
-
-   void *rast;
-   void *dsa;
-   void *blend;
-
-   enum pipe_format decode_format;
-};
-
-struct sp_mpeg12_buffer
-{
-   struct vl_vertex_buffer vertex_stream;
-
-   union
-   {
-      struct pipe_vertex_buffer all[2];
-      struct {
-         struct pipe_vertex_buffer quad, stream;
-      } individual;
-   } vertex_bufs;
-
-   struct vl_idct_buffer idct_y, idct_cb, idct_cr;
-
-   struct vl_mpeg12_mc_buffer mc;
-};
-
-struct pipe_video_context *
-sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
-                enum pipe_video_chroma_format chroma_format,
-                unsigned width, unsigned height, void *priv);
-
-/* Other drivers can call this function in their pipe_video_context constructors and pass it
-   an accelerated pipe_context along with suitable buffering modes, etc */
-struct pipe_video_context *
-sp_video_create_ex(struct pipe_context *pipe, enum pipe_video_profile profile,
-                   enum pipe_video_chroma_format chroma_format,
-                   unsigned width, unsigned height,
-                   bool pot_buffers,
-                   enum pipe_format decode_format);
-
-#endif /* SP_VIDEO_CONTEXT_H */
diff --git a/src/gallium/targets/xvmc-r600/Makefile b/src/gallium/targets/xvmc-r600/Makefile
index 62e47b53851..63b3f280fc5 100644
--- a/src/gallium/targets/xvmc-r600/Makefile
+++ b/src/gallium/targets/xvmc-r600/Makefile
@@ -7,7 +7,6 @@ PIPE_DRIVERS = \
         $(TOP)/src/gallium/drivers/r600/libr600.a \
 	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
         $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
-	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
         $(TOP)/src/gallium/drivers/trace/libtrace.a \
 	$(TOP)/src/gallium/auxiliary/libgallium.a
 
-- 
cgit v1.2.3


From 1d72cf6986168a49fbadfa31e9d719ed0222836f Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 17 Mar 2011 20:13:51 +0100
Subject: move empty block mask into mpeg12 context

---
 src/gallium/auxiliary/vl/vl_mpeg12_context.c     | 16 +++++++++++++---
 src/gallium/auxiliary/vl/vl_mpeg12_context.h     |  1 +
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 10 ----------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  1 -
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 947db1c22be..fb0418c5a47 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -40,6 +40,12 @@
 
 #define NUM_BUFFERS 2
 
+static const unsigned const_empty_block_mask_420[3][2][2] = {
+        { { 0x20, 0x10 },  { 0x08, 0x04 } },
+        { { 0x02, 0x02 },  { 0x02, 0x02 } },
+        { { 0x01, 0x01 },  { 0x01, 0x01 } }
+};
+
 static void
 flush_buffer(struct vl_mpeg12_context *ctx)
 {
@@ -161,7 +167,7 @@ upload_buffer(struct vl_mpeg12_context *ctx,
 
    for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
-         if (mb->cbp & (*ctx->mc_renderer.empty_block_mask)[0][y][x]) {
+         if (mb->cbp & (*ctx->empty_block_mask)[0][y][x]) {
             vl_idct_add_block(&buffer->idct_y, mb->mbx * 2 + x, mb->mby * 2 + y, blocks);
             blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
          }
@@ -172,7 +178,7 @@ upload_buffer(struct vl_mpeg12_context *ctx,
    assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 
    for (tb = 1; tb < 3; ++tb) {
-      if (mb->cbp & (*ctx->mc_renderer.empty_block_mask)[tb][0][0]) {
+      if (mb->cbp & (*ctx->empty_block_mask)[tb][0][0]) {
          if(tb == 1)
             vl_idct_add_block(&buffer->idct_cb, mb->mbx, mb->mby, blocks);
          else
@@ -289,7 +295,7 @@ vl_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
 
    for ( i = 0; i < num_macroblocks; ++i ) {
       vl_vb_add_block(&ctx->cur_buffer->vertex_stream, &mpeg12_macroblocks[i],
-                      ctx->mc_renderer.empty_block_mask);
+                      ctx->empty_block_mask);
       upload_buffer(ctx, ctx->cur_buffer, &mpeg12_macroblocks[i]);
    }
 
@@ -664,6 +670,10 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
       return NULL;
    }
 
+   /* TODO: Implement 422, 444 */
+   assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+   ctx->empty_block_mask = &const_empty_block_mask_420;
+
    if (!(idct_matrix = vl_idct_upload_matrix(ctx->pipe)))
       return false;
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
index 6ad734c9fda..6c964fbe92a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -41,6 +41,7 @@ struct vl_mpeg12_context
    struct pipe_video_context base;
    struct pipe_context *pipe;
    struct pipe_surface *decode_target;
+   const unsigned (*empty_block_mask)[3][2][2];
 
    struct pipe_vertex_buffer quads;
    unsigned vertex_buffer_size;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index df3373ebd88..0e5a21c18b6 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -56,12 +56,6 @@ enum VS_OUTPUT
    VS_O_MV3
 };
 
-static const unsigned const_empty_block_mask_420[3][2][2] = {
-        { { 0x20, 0x10 },  { 0x08, 0x04 } },
-        { { 0x02, 0x02 },  { 0x02, 0x02 } },
-        { { 0x01, 0x01 },  { 0x01, 0x01 } }
-};
-
 static void *
 create_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
@@ -537,10 +531,6 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    renderer->buffer_height = buffer_height;
    renderer->chroma_format = chroma_format;
 
-   /* TODO: Implement 422, 444 */
-   assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-   renderer->empty_block_mask = &const_empty_block_mask_420;
-
    renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
                                            texview_map_delete);
    if (!renderer->texview_map)
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 805fec530b0..b761961b219 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -43,7 +43,6 @@ struct vl_mpeg12_mc_renderer
    unsigned buffer_width;
    unsigned buffer_height;
    enum pipe_video_chroma_format chroma_format;
-   const unsigned (*empty_block_mask)[3][2][2];
 
    struct pipe_viewport_state viewport;
    struct pipe_framebuffer_state fb_state;
-- 
cgit v1.2.3


From 0719fdee2ecd8433eb687a2fb38a2cbcb1cea14a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 17 Mar 2011 22:18:05 +0100
Subject: fix chroma swizzle

---
 src/gallium/auxiliary/vl/vl_mpeg12_context.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index fb0418c5a47..ce1c158e828 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -699,11 +699,11 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
    }
 
    if(!vl_idct_init(&ctx->idct_cr, ctx->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
+                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
       return false;
 
    if(!vl_idct_init(&ctx->idct_cb, ctx->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
+                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
       return false;
 
    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
-- 
cgit v1.2.3


From b90f569a0f8578a20b9ee2997396b6c3ddd3b573 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 18 Mar 2011 00:26:34 +0100
Subject: vdpau: merge fixes for vdpau state tracker

---
 src/gallium/state_trackers/vdpau/decode.c  | 119 +++++++++++++++--------------
 src/gallium/state_trackers/vdpau/surface.c |   2 +-
 2 files changed, 62 insertions(+), 59 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 5d3674c5eb2..44f0ecc2622 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -33,37 +33,37 @@
 #include <util/u_debug.h>
 
 VdpStatus
-vlVdpDecoderCreate ( 	VdpDevice device, 
-						VdpDecoderProfile profile, 
-						uint32_t width, uint32_t height, 
-						uint32_t max_references, 
-						VdpDecoder *decoder 
+vlVdpDecoderCreate ( 	VdpDevice device,
+						VdpDecoderProfile profile,
+						uint32_t width, uint32_t height,
+						uint32_t max_references,
+						VdpDecoder *decoder
 )
 {
 	enum pipe_video_profile p_profile = PIPE_VIDEO_PROFILE_UNKNOWN;
 	VdpStatus ret = VDP_STATUS_OK;
 	vlVdpDecoder *vldecoder = NULL;
-	
+
 	debug_printf("[VDPAU] Creating decoder\n");
-	
+
 	if (!decoder)
 		return VDP_STATUS_INVALID_POINTER;
-	
+
 	if (!(width && height))
 		return VDP_STATUS_INVALID_VALUE;
-		
+
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)  {
       ret = VDP_STATUS_INVALID_HANDLE;
       goto inv_device;
    }
-   
+
    vldecoder = CALLOC(1,sizeof(vlVdpDecoder));
    if (!vldecoder)   {
 	   ret = VDP_STATUS_RESOURCES;
 	   goto no_decoder;
    }
-   
+
    p_profile = ProfileToPipe(profile);
    if (p_profile == PIPE_VIDEO_PROFILE_UNKNOWN)	{
 	   ret = VDP_STATUS_INVALID_DECODER_PROFILE;
@@ -71,22 +71,22 @@ vlVdpDecoderCreate ( 	VdpDevice device,
    }
 
 	// TODO: Define max_references. Used mainly for H264
-	
+
 	vldecoder->profile = p_profile;
 	vldecoder->height = height;
 	vldecoder->width = width;
 	vldecoder->device = dev;
 	vldecoder->vctx = NULL;
-		
+
 	*decoder = vlAddDataHTAB(vldecoder);
 	if (*decoder == 0) {
       ret = VDP_STATUS_ERROR;
       goto no_handle;
 	}
 	debug_printf("[VDPAU] Decoder created succesfully\n");
-	
+
 	return VDP_STATUS_OK;
-	
+
 	no_handle:
 	FREE(vldecoder);
 	inv_profile:
@@ -102,23 +102,23 @@ vlVdpDecoderDestroy  (VdpDecoder decoder
 {
 	debug_printf("[VDPAU] Destroying decoder\n");
 	vlVdpDecoder *vldecoder;
-	
+
 	vldecoder = (vlVdpDecoder *)vlGetDataHTAB(decoder);
 	if (!vldecoder)  {
       return VDP_STATUS_INVALID_HANDLE;
 	}
-	
+
 	if (vldecoder->vctx)
 	{
 		if (vldecoder->vctx->vscreen)
 			vl_screen_destroy(vldecoder->vctx->vscreen);
 	}
-	
+
 	if (vldecoder->vctx)
 		vl_video_destroy(vldecoder->vctx);
-		
+
 	FREE(vldecoder);
-	
+
 	return VDP_STATUS_OK;
 }
 
@@ -127,17 +127,18 @@ vlVdpCreateSurfaceTarget   (vlVdpDecoder *vldecoder,
 							vlVdpSurface *vlsurf
 )
 {
+	struct pipe_surface surf_template;
 	struct pipe_resource tmplt;
 	struct pipe_resource *surf_tex;
 	struct pipe_video_context *vctx;
-	
+
 	debug_printf("[VDPAU] Creating surface\n");
-		
+
 	if(!(vldecoder && vlsurf))
 		return VDP_STATUS_INVALID_POINTER;
-		
+
 	vctx = vldecoder->vctx->vpipe;
-		
+
 	memset(&tmplt, 0, sizeof(struct pipe_resource));
 	tmplt.target = PIPE_TEXTURE_2D;
 	tmplt.format = vctx->get_param(vctx,PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT);
@@ -156,23 +157,25 @@ vlVdpCreateSurfaceTarget   (vlVdpDecoder *vldecoder,
       tmplt.width0 = util_next_power_of_two(vlsurf->width);
       tmplt.height0 = util_next_power_of_two(vlsurf->height);
     }
-	
+
 	tmplt.depth0 = 1;
 	tmplt.usage = PIPE_USAGE_DEFAULT;
 	tmplt.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
 	tmplt.flags = 0;
-	
+
 	surf_tex = vctx->screen->resource_create(vctx->screen, &tmplt);
-	
-	vlsurf->psurface = vctx->screen->get_tex_surface(vctx->screen, surf_tex, 0, 0, 0,
-                                         PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET);
-										 
+
+	memset(&surf_template, 0, sizeof(surf_template));
+	surf_template.format = surf_tex->format;
+	surf_template.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+	vlsurf->psurface = vctx->create_surface(vctx->screen, surf_tex, &surf_template);
+
 	pipe_resource_reference(&surf_tex, NULL);
-	
+
 	if (!vlsurf->psurface)
 		return VDP_STATUS_RESOURCES;
 	debug_printf("[VDPAU] Done creating surface\n");
-	
+
 	return VDP_STATUS_OK;
 }
 
@@ -194,13 +197,13 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 	uint32_t num_macroblocks;
 	struct pipe_mpeg12_macroblock *pipe_macroblocks;
 	VdpStatus ret;
-	
+
 	debug_printf("[VDPAU] Decoding MPEG2\n");
 
 	t_vdp_surf = vlsurf;
-	
+
 	/* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */
-	if (picture_info->backward_reference ==  VDP_INVALID_HANDLE) 
+	if (picture_info->backward_reference ==  VDP_INVALID_HANDLE)
 		p_vdp_surf = NULL;
 	else	{
 		p_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference);
@@ -208,17 +211,17 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
 			return VDP_STATUS_INVALID_HANDLE;
 	}
 
-	if (picture_info->forward_reference ==  VDP_INVALID_HANDLE) 
+	if (picture_info->forward_reference ==  VDP_INVALID_HANDLE)
 		f_vdp_surf = NULL;
 	else	{
 		f_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference);
 		if (!f_vdp_surf)
 			return VDP_STATUS_INVALID_HANDLE;
 	}
-		
-	
+
+
 	if (f_vdp_surf ==  VDP_INVALID_HANDLE) f_vdp_surf = NULL;
-	
+
 	ret = vlVdpCreateSurfaceTarget(vldecoder,t_vdp_surf);
 
 	vpipe = vldecoder->vctx->vpipe;
@@ -227,23 +230,23 @@ vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
                      &num_macroblocks, &pipe_macroblocks))
 					 {
 						 debug_printf("[VDPAU] Error in frame-header. Skipping.\n");
-						 
+
 						 ret = VDP_STATUS_OK;
 						 goto skip_frame;
 					 }
-		
+
 	vpipe->set_decode_target(vpipe,t_surf);
 	vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks, (struct pipe_macroblock *)pipe_macroblocks, NULL);
-	
+
 	skip_frame:
 	return ret;
 }
 
 VdpStatus
-vlVdpDecoderRender (VdpDecoder decoder, 
-					VdpVideoSurface target, 
-					VdpPictureInfo const *picture_info, 
-					uint32_t bitstream_buffer_count, 
+vlVdpDecoderRender (VdpDecoder decoder,
+					VdpVideoSurface target,
+					VdpPictureInfo const *picture_info,
+					uint32_t bitstream_buffer_count,
 					VdpBitstreamBuffer const *bitstream_buffers
 )
 {
@@ -252,11 +255,11 @@ vlVdpDecoderRender (VdpDecoder decoder,
 	struct vl_screen *vscreen;
 	VdpStatus ret;
 	debug_printf("[VDPAU] Decoding\n");
-		
+
 	if (!(picture_info && bitstream_buffers))
 		return VDP_STATUS_INVALID_POINTER;
-	
-	
+
+
 	vldecoder = (vlVdpDecoder *)vlGetDataHTAB(decoder);
 	if (!vldecoder)
 		return VDP_STATUS_INVALID_HANDLE;
@@ -264,22 +267,22 @@ vlVdpDecoderRender (VdpDecoder decoder,
 	vlsurf = (vlVdpSurface *)vlGetDataHTAB(target);
 	if (!vlsurf)
 		return VDP_STATUS_INVALID_HANDLE;
-	
+
 	if (vlsurf->device != vldecoder->device)
 		return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
-		
+
 	/* Test doesn't make sence */
 	/*if (vlsurf->chroma_format != vldecoder->chroma_format)
 		return VDP_STATUS_INVALID_CHROMA_TYPE;*/
-		
+
 	vscreen = vl_screen_create(vldecoder->device->display, vldecoder->device->screen);
 	if (!vscreen)
 		return VDP_STATUS_RESOURCES;
-	
+
 	vldecoder->vctx = vl_video_create(vscreen, vldecoder->profile, vlsurf->chroma_format, vldecoder->width, vldecoder->height);
 	if (!vldecoder->vctx)
 		return VDP_STATUS_RESOURCES;
-		
+
     // TODO: Right now only mpeg2 is supported.
 	switch (vldecoder->vctx->vpipe->profile)   {
 		case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
@@ -295,15 +298,15 @@ vlVdpDecoderRender (VdpDecoder decoder,
 	return ret;
 }
 
-VdpStatus 
+VdpStatus
 vlVdpGenerateCSCMatrix(
-	VdpProcamp *procamp, 
+	VdpProcamp *procamp,
 	VdpColorStandard standard,
 	VdpCSCMatrix *csc_matrix)
 {
 	debug_printf("[VDPAU] Generating CSCMatrix\n");
 	if (!(csc_matrix && procamp))
 		return VDP_STATUS_INVALID_POINTER;
-		
+
 	return VDP_STATUS_OK;
-}
\ No newline at end of file
+}
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index 9b6dac9c3f4..0ebfd12d988 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -104,7 +104,7 @@ vlVdpVideoSurfaceDestroy ( VdpVideoSurface surface )
  if (p_surf->psurface) {
  if (p_surf->psurface->texture) {
  if (p_surf->psurface->texture->screen)
- p_surf->psurface->texture->screen->tex_surface_destroy(p_surf->psurface);
+ p_surf->psurface->context->surface_destroy(p_surf->psurface->context, p_surf->psurface);
  }
  }
  FREE(p_surf);
-- 
cgit v1.2.3


From 49f4aff75ce781fb71383a5ffe44e51e34ff1bf3 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 18 Mar 2011 01:06:41 +0100
Subject: vdpau: make indention and formating more sane

---
 src/gallium/state_trackers/vdpau/bitmap.c          |  57 ++-
 src/gallium/state_trackers/vdpau/decode.c          | 405 ++++++++++-----------
 src/gallium/state_trackers/vdpau/device.c          | 115 +++---
 src/gallium/state_trackers/vdpau/mixer.c           | 185 +++++-----
 .../state_trackers/vdpau/mpeg2_bitstream_parser.c  | 199 +++++-----
 .../state_trackers/vdpau/mpeg2_bitstream_parser.h  |  36 +-
 src/gallium/state_trackers/vdpau/output.c          |  50 +--
 src/gallium/state_trackers/vdpau/preemption.c      |  18 +-
 src/gallium/state_trackers/vdpau/presentation.c    | 132 ++++---
 src/gallium/state_trackers/vdpau/query.c           |  60 +--
 src/gallium/state_trackers/vdpau/vdpau_private.h   |  16 +-
 11 files changed, 615 insertions(+), 658 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/bitmap.c b/src/gallium/state_trackers/vdpau/bitmap.c
index f1a9d9a6828..b570f2752e4 100644
--- a/src/gallium/state_trackers/vdpau/bitmap.c
+++ b/src/gallium/state_trackers/vdpau/bitmap.c
@@ -30,46 +30,45 @@
 #include <util/u_debug.h>
 
 VdpStatus
-vlVdpBitmapSurfaceCreate(	VdpDevice device, 
-							VdpRGBAFormat rgba_format, 
-							uint32_t width, uint32_t height, 
-							VdpBool frequently_accessed, 
-							VdpBitmapSurface *surface)
+vlVdpBitmapSurfaceCreate(VdpDevice device,
+                         VdpRGBAFormat rgba_format,
+                         uint32_t width, uint32_t height,
+                         VdpBool frequently_accessed,
+                         VdpBitmapSurface *surface)
 {
-	debug_printf("[VDPAU] Creating a bitmap surface\n");
-	if (!surface)
-		return VDP_STATUS_INVALID_POINTER;
+   debug_printf("[VDPAU] Creating a bitmap surface\n");
+   if (!surface)
+      return VDP_STATUS_INVALID_POINTER;
 
-	return VDP_STATUS_NO_IMPLEMENTATION;
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
 VdpStatus
-vlVdpBitmapSurfaceDestroy ( VdpBitmapSurface  surface )
+vlVdpBitmapSurfaceDestroy(VdpBitmapSurface surface)
 {
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
 VdpStatus
-vlVdpBitmapSurfaceGetParameters ( 	VdpBitmapSurface  surface,
-									VdpRGBAFormat  *rgba_format, 
-									uint32_t *width, uint32_t *height, 
-									VdpBool  *frequently_accessed)
+vlVdpBitmapSurfaceGetParameters(VdpBitmapSurface surface,
+                                VdpRGBAFormat *rgba_format,
+                                uint32_t *width, uint32_t *height,
+                                VdpBool *frequently_accessed)
 {
-	if (!(rgba_format && width && height && frequently_accessed))
-		return VDP_STATUS_INVALID_POINTER;
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
+   if (!(rgba_format && width && height && frequently_accessed))
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
 VdpStatus
-vlVdpBitmapSurfacePutBitsNative ( 	VdpBitmapSurface  surface, 
-									void const *const *source_data, 
-									uint32_t const *source_pitches, 
-									VdpRect  const *destination_rect )
+vlVdpBitmapSurfacePutBitsNative(VdpBitmapSurface surface,
+                                void const *const *source_data,
+                                uint32_t const *source_pitches,
+                                VdpRect const *destination_rect )
 {
-	if (!(source_data && source_pitches && destination_rect))
-		return VDP_STATUS_INVALID_POINTER;
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
-}
\ No newline at end of file
+   if (!(source_data && source_pitches && destination_rect))
+       return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 44f0ecc2622..8bc54d7fc7d 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -33,280 +33,271 @@
 #include <util/u_debug.h>
 
 VdpStatus
-vlVdpDecoderCreate ( 	VdpDevice device,
-						VdpDecoderProfile profile,
-						uint32_t width, uint32_t height,
-						uint32_t max_references,
-						VdpDecoder *decoder
-)
+vlVdpDecoderCreate(VdpDevice device,
+                   VdpDecoderProfile profile,
+                   uint32_t width, uint32_t height,
+                   uint32_t max_references,
+                   VdpDecoder *decoder)
 {
-	enum pipe_video_profile p_profile = PIPE_VIDEO_PROFILE_UNKNOWN;
-	VdpStatus ret = VDP_STATUS_OK;
-	vlVdpDecoder *vldecoder = NULL;
+   enum pipe_video_profile p_profile = PIPE_VIDEO_PROFILE_UNKNOWN;
+   VdpStatus ret = VDP_STATUS_OK;
+   vlVdpDecoder *vldecoder = NULL;
 
-	debug_printf("[VDPAU] Creating decoder\n");
+   debug_printf("[VDPAU] Creating decoder\n");
 
-	if (!decoder)
-		return VDP_STATUS_INVALID_POINTER;
+   if (!decoder)
+      return VDP_STATUS_INVALID_POINTER;
 
-	if (!(width && height))
-		return VDP_STATUS_INVALID_VALUE;
+   if (!(width && height))
+      return VDP_STATUS_INVALID_VALUE;
 
    vlVdpDevice *dev = vlGetDataHTAB(device);
-   if (!dev)  {
+   if (!dev) {
       ret = VDP_STATUS_INVALID_HANDLE;
       goto inv_device;
    }
 
    vldecoder = CALLOC(1,sizeof(vlVdpDecoder));
-   if (!vldecoder)   {
-	   ret = VDP_STATUS_RESOURCES;
-	   goto no_decoder;
+   if (!vldecoder) {
+      ret = VDP_STATUS_RESOURCES;
+      goto no_decoder;
    }
 
    p_profile = ProfileToPipe(profile);
    if (p_profile == PIPE_VIDEO_PROFILE_UNKNOWN)	{
-	   ret = VDP_STATUS_INVALID_DECODER_PROFILE;
-	   goto inv_profile;
+      ret = VDP_STATUS_INVALID_DECODER_PROFILE;
+      goto inv_profile;
    }
 
-	// TODO: Define max_references. Used mainly for H264
+   // TODO: Define max_references. Used mainly for H264
 
-	vldecoder->profile = p_profile;
-	vldecoder->height = height;
-	vldecoder->width = width;
-	vldecoder->device = dev;
-	vldecoder->vctx = NULL;
+   vldecoder->profile = p_profile;
+   vldecoder->height = height;
+   vldecoder->width = width;
+   vldecoder->device = dev;
+   vldecoder->vctx = NULL;
 
-	*decoder = vlAddDataHTAB(vldecoder);
-	if (*decoder == 0) {
+   *decoder = vlAddDataHTAB(vldecoder);
+   if (*decoder == 0) {
       ret = VDP_STATUS_ERROR;
       goto no_handle;
-	}
-	debug_printf("[VDPAU] Decoder created succesfully\n");
+   }
+   debug_printf("[VDPAU] Decoder created succesfully\n");
 
-	return VDP_STATUS_OK;
+   return VDP_STATUS_OK;
 
-	no_handle:
-	FREE(vldecoder);
-	inv_profile:
-	no_screen:
-	no_decoder:
-	inv_device:
+no_handle:
+   FREE(vldecoder);
+   inv_profile:
+no_screen:
+no_decoder:
+inv_device:
     return ret;
 }
 
 VdpStatus
-vlVdpDecoderDestroy  (VdpDecoder decoder
-)
+vlVdpDecoderDestroy(VdpDecoder decoder)
 {
-	debug_printf("[VDPAU] Destroying decoder\n");
-	vlVdpDecoder *vldecoder;
+   debug_printf("[VDPAU] Destroying decoder\n");
+   vlVdpDecoder *vldecoder;
 
-	vldecoder = (vlVdpDecoder *)vlGetDataHTAB(decoder);
-	if (!vldecoder)  {
+   vldecoder = (vlVdpDecoder *)vlGetDataHTAB(decoder);
+   if (!vldecoder) {
       return VDP_STATUS_INVALID_HANDLE;
-	}
+   }
 
-	if (vldecoder->vctx)
-	{
-		if (vldecoder->vctx->vscreen)
-			vl_screen_destroy(vldecoder->vctx->vscreen);
-	}
+   if (vldecoder->vctx) {
+      if (vldecoder->vctx->vscreen)
+         vl_screen_destroy(vldecoder->vctx->vscreen);
+   }
 
-	if (vldecoder->vctx)
-		vl_video_destroy(vldecoder->vctx);
+   if (vldecoder->vctx)
+       vl_video_destroy(vldecoder->vctx);
 
-	FREE(vldecoder);
+   FREE(vldecoder);
 
-	return VDP_STATUS_OK;
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
-vlVdpCreateSurfaceTarget   (vlVdpDecoder *vldecoder,
-							vlVdpSurface *vlsurf
-)
+vlVdpCreateSurfaceTarget(vlVdpDecoder *vldecoder, vlVdpSurface *vlsurf)
 {
-	struct pipe_surface surf_template;
-	struct pipe_resource tmplt;
-	struct pipe_resource *surf_tex;
-	struct pipe_video_context *vctx;
+   struct pipe_surface surf_template;
+   struct pipe_resource tmplt;
+   struct pipe_resource *surf_tex;
+   struct pipe_video_context *vctx;
 
-	debug_printf("[VDPAU] Creating surface\n");
+   debug_printf("[VDPAU] Creating surface\n");
 
-	if(!(vldecoder && vlsurf))
-		return VDP_STATUS_INVALID_POINTER;
+   if(!(vldecoder && vlsurf))
+      return VDP_STATUS_INVALID_POINTER;
 
-	vctx = vldecoder->vctx->vpipe;
+   vctx = vldecoder->vctx->vpipe;
 
-	memset(&tmplt, 0, sizeof(struct pipe_resource));
-	tmplt.target = PIPE_TEXTURE_2D;
-	tmplt.format = vctx->get_param(vctx,PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT);
-	tmplt.last_level = 0;
+   memset(&tmplt, 0, sizeof(struct pipe_resource));
+   tmplt.target = PIPE_TEXTURE_2D;
+   tmplt.format = vctx->get_param(vctx,PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT);
+   tmplt.last_level = 0;
 
-	if (vctx->is_format_supported(vctx, tmplt.format,
-                                  PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET,
-                                  PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO)) {
+   if (vctx->is_format_supported(vctx, tmplt.format,
+                                 PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET,
+                                 PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO)) {
       tmplt.width0 = vlsurf->width;
       tmplt.height0 = vlsurf->height;
-    }
-    else {
+   } else {
       assert(vctx->is_format_supported(vctx, tmplt.format,
                                        PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET,
                                        PIPE_TEXTURE_GEOM_NON_SQUARE));
       tmplt.width0 = util_next_power_of_two(vlsurf->width);
       tmplt.height0 = util_next_power_of_two(vlsurf->height);
-    }
+   }
 
-	tmplt.depth0 = 1;
-	tmplt.usage = PIPE_USAGE_DEFAULT;
-	tmplt.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-	tmplt.flags = 0;
+   tmplt.depth0 = 1;
+   tmplt.usage = PIPE_USAGE_DEFAULT;
+   tmplt.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   tmplt.flags = 0;
 
-	surf_tex = vctx->screen->resource_create(vctx->screen, &tmplt);
+   surf_tex = vctx->screen->resource_create(vctx->screen, &tmplt);
 
-	memset(&surf_template, 0, sizeof(surf_template));
-	surf_template.format = surf_tex->format;
-	surf_template.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-	vlsurf->psurface = vctx->create_surface(vctx->screen, surf_tex, &surf_template);
+   memset(&surf_template, 0, sizeof(surf_template));
+   surf_template.format = surf_tex->format;
+   surf_template.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   vlsurf->psurface = vctx->create_surface(vctx->screen, surf_tex, &surf_template);
 
-	pipe_resource_reference(&surf_tex, NULL);
+   pipe_resource_reference(&surf_tex, NULL);
 
-	if (!vlsurf->psurface)
-		return VDP_STATUS_RESOURCES;
-	debug_printf("[VDPAU] Done creating surface\n");
+   if (!vlsurf->psurface)
+      return VDP_STATUS_RESOURCES;
+   debug_printf("[VDPAU] Done creating surface\n");
 
-	return VDP_STATUS_OK;
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
-vlVdpDecoderRenderMpeg2    (vlVdpDecoder *vldecoder,
-							vlVdpSurface *vlsurf,
-							VdpPictureInfoMPEG1Or2 *picture_info,
-							uint32_t bitstream_buffer_count,
-							VdpBitstreamBuffer const *bitstream_buffers
-							)
+vlVdpDecoderRenderMpeg2(vlVdpDecoder *vldecoder,
+                        vlVdpSurface *vlsurf,
+                        VdpPictureInfoMPEG1Or2 *picture_info,
+                        uint32_t bitstream_buffer_count,
+                        VdpBitstreamBuffer const *bitstream_buffers)
 {
-	struct pipe_video_context *vpipe;
-	vlVdpSurface *t_vdp_surf;
-	vlVdpSurface *p_vdp_surf;
-	vlVdpSurface *f_vdp_surf;
-	struct pipe_surface *t_surf;
-	struct pipe_surface *p_surf;
-	struct pipe_surface *f_surf;
-	uint32_t num_macroblocks;
-	struct pipe_mpeg12_macroblock *pipe_macroblocks;
-	VdpStatus ret;
-
-	debug_printf("[VDPAU] Decoding MPEG2\n");
-
-	t_vdp_surf = vlsurf;
-
-	/* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */
-	if (picture_info->backward_reference ==  VDP_INVALID_HANDLE)
-		p_vdp_surf = NULL;
-	else	{
-		p_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference);
-		if (!p_vdp_surf)
-			return VDP_STATUS_INVALID_HANDLE;
-	}
-
-	if (picture_info->forward_reference ==  VDP_INVALID_HANDLE)
-		f_vdp_surf = NULL;
-	else	{
-		f_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference);
-		if (!f_vdp_surf)
-			return VDP_STATUS_INVALID_HANDLE;
-	}
-
-
-	if (f_vdp_surf ==  VDP_INVALID_HANDLE) f_vdp_surf = NULL;
-
-	ret = vlVdpCreateSurfaceTarget(vldecoder,t_vdp_surf);
-
-	vpipe = vldecoder->vctx->vpipe;
-
-	if (vlVdpMPEG2BitstreamToMacroblock(vpipe->screen, bitstream_buffers, bitstream_buffer_count,
-                     &num_macroblocks, &pipe_macroblocks))
-					 {
-						 debug_printf("[VDPAU] Error in frame-header. Skipping.\n");
-
-						 ret = VDP_STATUS_OK;
-						 goto skip_frame;
-					 }
-
-	vpipe->set_decode_target(vpipe,t_surf);
-	vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks, (struct pipe_macroblock *)pipe_macroblocks, NULL);
-
-	skip_frame:
-	return ret;
+   struct pipe_video_context *vpipe;
+   vlVdpSurface *t_vdp_surf;
+   vlVdpSurface *p_vdp_surf;
+   vlVdpSurface *f_vdp_surf;
+   struct pipe_surface *t_surf;
+   struct pipe_surface *p_surf;
+   struct pipe_surface *f_surf;
+   uint32_t num_macroblocks;
+   struct pipe_mpeg12_macroblock *pipe_macroblocks;
+   VdpStatus ret;
+
+   debug_printf("[VDPAU] Decoding MPEG2\n");
+
+   t_vdp_surf = vlsurf;
+
+   /* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */
+   if (picture_info->backward_reference ==  VDP_INVALID_HANDLE)
+      p_vdp_surf = NULL;
+   else {
+      p_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference);
+      if (!p_vdp_surf)
+         return VDP_STATUS_INVALID_HANDLE;
+   }
+
+   if (picture_info->forward_reference ==  VDP_INVALID_HANDLE)
+      f_vdp_surf = NULL;
+   else {
+      f_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference);
+      if (!f_vdp_surf)
+         return VDP_STATUS_INVALID_HANDLE;
+   }
+
+   if (f_vdp_surf ==  VDP_INVALID_HANDLE) f_vdp_surf = NULL;
+
+   ret = vlVdpCreateSurfaceTarget(vldecoder,t_vdp_surf);
+
+   vpipe = vldecoder->vctx->vpipe;
+
+   if (vlVdpMPEG2BitstreamToMacroblock(vpipe->screen, bitstream_buffers, bitstream_buffer_count,
+                                       &num_macroblocks, &pipe_macroblocks))
+   {
+      debug_printf("[VDPAU] Error in frame-header. Skipping.\n");
+
+      ret = VDP_STATUS_OK;
+      goto skip_frame;
+   }
+
+   vpipe->set_decode_target(vpipe,t_surf);
+   vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks,
+                             (struct pipe_macroblock *)pipe_macroblocks, NULL);
+
+   skip_frame:
+   return ret;
 }
 
 VdpStatus
-vlVdpDecoderRender (VdpDecoder decoder,
-					VdpVideoSurface target,
-					VdpPictureInfo const *picture_info,
-					uint32_t bitstream_buffer_count,
-					VdpBitstreamBuffer const *bitstream_buffers
-)
+vlVdpDecoderRender(VdpDecoder decoder,
+                   VdpVideoSurface target,
+                   VdpPictureInfo const *picture_info,
+                   uint32_t bitstream_buffer_count,
+                   VdpBitstreamBuffer const *bitstream_buffers)
 {
-	vlVdpDecoder *vldecoder;
-	vlVdpSurface *vlsurf;
-	struct vl_screen *vscreen;
-	VdpStatus ret;
-	debug_printf("[VDPAU] Decoding\n");
-
-	if (!(picture_info && bitstream_buffers))
-		return VDP_STATUS_INVALID_POINTER;
-
-
-	vldecoder = (vlVdpDecoder *)vlGetDataHTAB(decoder);
-	if (!vldecoder)
-		return VDP_STATUS_INVALID_HANDLE;
-
-	vlsurf = (vlVdpSurface *)vlGetDataHTAB(target);
-	if (!vlsurf)
-		return VDP_STATUS_INVALID_HANDLE;
-
-	if (vlsurf->device != vldecoder->device)
-		return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
-
-	/* Test doesn't make sence */
-	/*if (vlsurf->chroma_format != vldecoder->chroma_format)
-		return VDP_STATUS_INVALID_CHROMA_TYPE;*/
-
-	vscreen = vl_screen_create(vldecoder->device->display, vldecoder->device->screen);
-	if (!vscreen)
-		return VDP_STATUS_RESOURCES;
-
-	vldecoder->vctx = vl_video_create(vscreen, vldecoder->profile, vlsurf->chroma_format, vldecoder->width, vldecoder->height);
-	if (!vldecoder->vctx)
-		return VDP_STATUS_RESOURCES;
-
-    // TODO: Right now only mpeg2 is supported.
-	switch (vldecoder->vctx->vpipe->profile)   {
-		case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
-		case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
-			ret = vlVdpDecoderRenderMpeg2(vldecoder,vlsurf,(VdpPictureInfoMPEG1Or2 *)picture_info,
-											bitstream_buffer_count,bitstream_buffers);
-			break;
-		default:
-			return VDP_STATUS_INVALID_DECODER_PROFILE;
-	}
-	assert(0);
-
-	return ret;
+   vlVdpDecoder *vldecoder;
+   vlVdpSurface *vlsurf;
+   struct vl_screen *vscreen;
+   VdpStatus ret;
+
+   debug_printf("[VDPAU] Decoding\n");
+
+   if (!(picture_info && bitstream_buffers))
+      return VDP_STATUS_INVALID_POINTER;
+
+   vldecoder = (vlVdpDecoder *)vlGetDataHTAB(decoder);
+   if (!vldecoder)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   vlsurf = (vlVdpSurface *)vlGetDataHTAB(target);
+   if (!vlsurf)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   if (vlsurf->device != vldecoder->device)
+      return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
+
+   /* Test doesn't make sence */
+   /*if (vlsurf->chroma_format != vldecoder->chroma_format)
+   return VDP_STATUS_INVALID_CHROMA_TYPE;*/
+
+   vscreen = vl_screen_create(vldecoder->device->display, vldecoder->device->screen);
+   if (!vscreen)
+      return VDP_STATUS_RESOURCES;
+
+   vldecoder->vctx = vl_video_create(vscreen, vldecoder->profile, vlsurf->chroma_format, vldecoder->width, vldecoder->height);
+   if (!vldecoder->vctx)
+      return VDP_STATUS_RESOURCES;
+
+   // TODO: Right now only mpeg2 is supported.
+   switch (vldecoder->vctx->vpipe->profile)   {
+   case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
+   case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
+      ret = vlVdpDecoderRenderMpeg2(vldecoder,vlsurf,(VdpPictureInfoMPEG1Or2 *)picture_info,
+                                    bitstream_buffer_count,bitstream_buffers);
+      break;
+   default:
+      return VDP_STATUS_INVALID_DECODER_PROFILE;
+   }
+   assert(0);
+
+   return ret;
 }
 
 VdpStatus
-vlVdpGenerateCSCMatrix(
-	VdpProcamp *procamp,
-	VdpColorStandard standard,
-	VdpCSCMatrix *csc_matrix)
+vlVdpGenerateCSCMatrix(VdpProcamp *procamp,
+                       VdpColorStandard standard,
+                       VdpCSCMatrix *csc_matrix)
 {
-	debug_printf("[VDPAU] Generating CSCMatrix\n");
-	if (!(csc_matrix && procamp))
-		return VDP_STATUS_INVALID_POINTER;
+   debug_printf("[VDPAU] Generating CSCMatrix\n");
+   if (!(csc_matrix && procamp))
+      return VDP_STATUS_INVALID_POINTER;
 
-	return VDP_STATUS_OK;
+   return VDP_STATUS_OK;
 }
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index 496e2b8def0..0f9b7b6f5d8 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -34,9 +34,10 @@
 
 
 PUBLIC VdpStatus
-vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, VdpGetProcAddress **get_proc_address)
+vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
+                          VdpGetProcAddress **get_proc_address)
 {
-   VdpStatus    ret;
+   VdpStatus ret;
    vlVdpDevice *dev = NULL;
 
    if (!(display && device && get_proc_address))
@@ -67,7 +68,7 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device, VdpGe
       ret = VDP_STATUS_ERROR;
       goto no_handle;
    }
-	
+
    *get_proc_address = &vlVdpGetProcAddress;
    debug_printf("[VDPAU] Device created succesfully\n");
 
@@ -83,17 +84,18 @@ no_htab:
    return ret;
 }
 
-PUBLIC VdpStatus 
-vlVdpPresentationQueueTargetCreateX11(VdpDevice device, Drawable drawable,VdpPresentationQueueTarget *target)
+PUBLIC VdpStatus
+vlVdpPresentationQueueTargetCreateX11(VdpDevice device, Drawable drawable,
+                                      VdpPresentationQueueTarget *target)
 {
    VdpStatus    ret;
    vlVdpPresentationQueueTarget *pqt = NULL;
-   
+
    debug_printf("[VDPAU] Creating PresentationQueueTarget\n");
 
    if (!drawable)
       return VDP_STATUS_INVALID_HANDLE;
-	  
+
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
@@ -101,10 +103,10 @@ vlVdpPresentationQueueTargetCreateX11(VdpDevice device, Drawable drawable,VdpPre
    pqt = CALLOC(1, sizeof(vlVdpPresentationQueue));
    if (!pqt)
       return VDP_STATUS_RESOURCES;
-   
+
    pqt->device = dev;
    pqt->drawable = drawable;
-	  
+
 	*target = vlAddDataHTAB(pqt);
    if (*target == 0) {
       ret = VDP_STATUS_ERROR;
@@ -112,17 +114,17 @@ vlVdpPresentationQueueTargetCreateX11(VdpDevice device, Drawable drawable,VdpPre
    }
 
 
-	return VDP_STATUS_OK;
-    no_handle:
-    FREE(dev);
-	return ret;
+   return VDP_STATUS_OK;
+no_handle:
+   FREE(dev);
+   return ret;
 }
 
-VdpStatus 
+VdpStatus
 vlVdpDeviceDestroy(VdpDevice device)
 {
    debug_printf("[VDPAU] Destroying destroy\n");
-	
+
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
@@ -134,7 +136,7 @@ vlVdpDeviceDestroy(VdpDevice device)
    return VDP_STATUS_OK;
 }
 
-VdpStatus 
+VdpStatus
 vlVdpGetProcAddress(VdpDevice device, VdpFuncId function_id, void **function_pointer)
 {
    vlVdpDevice *dev = vlGetDataHTAB(device);
@@ -150,49 +152,44 @@ vlVdpGetProcAddress(VdpDevice device, VdpFuncId function_id, void **function_poi
    return VDP_STATUS_OK;
 }
 
-#define _ERROR_TYPE(TYPE,STRING) \
-	case TYPE:	\
-		return STRING; \
-	break
+#define _ERROR_TYPE(TYPE,STRING) case TYPE: return STRING;
 
-char const * 
-vlVdpGetErrorString (
-VdpStatus status)
+char const *
+vlVdpGetErrorString (VdpStatus status)
 {
-	switch (status)
-	{
-		_ERROR_TYPE(VDP_STATUS_OK,"The operation completed successfully; no error.");
-		_ERROR_TYPE(VDP_STATUS_NO_IMPLEMENTATION,"No backend implementation could be loaded.");
-		_ERROR_TYPE(VDP_STATUS_DISPLAY_PREEMPTED,"The display was preempted, or a fatal error occurred. The application must re-initialize VDPAU.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_HANDLE,"An invalid handle value was provided. Either the handle does not exist at all, or refers to an object of an incorrect type.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_POINTER ,"An invalid pointer was provided. Typically, this means that a NULL pointer was provided for an 'output' parameter.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_CHROMA_TYPE ,"An invalid/unsupported VdpChromaType value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_Y_CB_CR_FORMAT,"An invalid/unsupported VdpYCbCrFormat value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_RGBA_FORMAT,"An invalid/unsupported VdpRGBAFormat value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_INDEXED_FORMAT,"An invalid/unsupported VdpIndexedFormat value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_COLOR_STANDARD,"An invalid/unsupported VdpColorStandard value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_COLOR_TABLE_FORMAT,"An invalid/unsupported VdpColorTableFormat value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_BLEND_FACTOR,"An invalid/unsupported VdpOutputSurfaceRenderBlendFactor value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_BLEND_EQUATION,"An invalid/unsupported VdpOutputSurfaceRenderBlendEquation value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_FLAG,"An invalid/unsupported flag value/combination was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_DECODER_PROFILE,"An invalid/unsupported VdpDecoderProfile value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_VIDEO_MIXER_FEATURE,"An invalid/unsupported VdpVideoMixerFeature value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_VIDEO_MIXER_PARAMETER ,"An invalid/unsupported VdpVideoMixerParameter value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_VIDEO_MIXER_ATTRIBUTE,"An invalid/unsupported VdpVideoMixerAttribute value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_VIDEO_MIXER_PICTURE_STRUCTURE,"An invalid/unsupported VdpVideoMixerPictureStructure value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_FUNC_ID,"An invalid/unsupported VdpFuncId value was supplied.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_SIZE,"The size of a supplied object does not match the object it is being used with.\
-							For example, a VdpVideoMixer is configured to process VdpVideoSurface objects of a specific size.\
-							If presented with a VdpVideoSurface of a different size, this error will be raised.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_VALUE,"An invalid/unsupported value was supplied.\
-							This is a catch-all error code for values of type other than those with a specific error code.");
-		_ERROR_TYPE(VDP_STATUS_INVALID_STRUCT_VERSION,"An invalid/unsupported structure version was specified in a versioned structure. \
-							This implies that the implementation is older than the header file the application was built against.");
-		_ERROR_TYPE(VDP_STATUS_RESOURCES,"The system does not have enough resources to complete the requested operation at this time.");
-		_ERROR_TYPE(VDP_STATUS_HANDLE_DEVICE_MISMATCH,"The set of handles supplied are not all related to the same VdpDevice.When performing operations \
-							that operate on multiple surfaces, such as VdpOutputSurfaceRenderOutputSurface or VdpVideoMixerRender, \
-							all supplied surfaces must have been created within the context of the same VdpDevice object. \
-							This error is raised if they were not.");
-		_ERROR_TYPE(VDP_STATUS_ERROR,"A catch-all error, used when no other error code applies.");
-	}
+   switch (status) {
+   _ERROR_TYPE(VDP_STATUS_OK,"The operation completed successfully; no error.");
+   _ERROR_TYPE(VDP_STATUS_NO_IMPLEMENTATION,"No backend implementation could be loaded.");
+   _ERROR_TYPE(VDP_STATUS_DISPLAY_PREEMPTED,"The display was preempted, or a fatal error occurred. The application must re-initialize VDPAU.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_HANDLE,"An invalid handle value was provided. Either the handle does not exist at all, or refers to an object of an incorrect type.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_POINTER ,"An invalid pointer was provided. Typically, this means that a NULL pointer was provided for an 'output' parameter.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_CHROMA_TYPE ,"An invalid/unsupported VdpChromaType value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_Y_CB_CR_FORMAT,"An invalid/unsupported VdpYCbCrFormat value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_RGBA_FORMAT,"An invalid/unsupported VdpRGBAFormat value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_INDEXED_FORMAT,"An invalid/unsupported VdpIndexedFormat value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_COLOR_STANDARD,"An invalid/unsupported VdpColorStandard value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_COLOR_TABLE_FORMAT,"An invalid/unsupported VdpColorTableFormat value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_BLEND_FACTOR,"An invalid/unsupported VdpOutputSurfaceRenderBlendFactor value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_BLEND_EQUATION,"An invalid/unsupported VdpOutputSurfaceRenderBlendEquation value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_FLAG,"An invalid/unsupported flag value/combination was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_DECODER_PROFILE,"An invalid/unsupported VdpDecoderProfile value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_VIDEO_MIXER_FEATURE,"An invalid/unsupported VdpVideoMixerFeature value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_VIDEO_MIXER_PARAMETER ,"An invalid/unsupported VdpVideoMixerParameter value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_VIDEO_MIXER_ATTRIBUTE,"An invalid/unsupported VdpVideoMixerAttribute value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_VIDEO_MIXER_PICTURE_STRUCTURE,"An invalid/unsupported VdpVideoMixerPictureStructure value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_FUNC_ID,"An invalid/unsupported VdpFuncId value was supplied.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_SIZE,"The size of a supplied object does not match the object it is being used with.\
+      For example, a VdpVideoMixer is configured to process VdpVideoSurface objects of a specific size.\
+      If presented with a VdpVideoSurface of a different size, this error will be raised.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_VALUE,"An invalid/unsupported value was supplied.\
+      This is a catch-all error code for values of type other than those with a specific error code.");
+   _ERROR_TYPE(VDP_STATUS_INVALID_STRUCT_VERSION,"An invalid/unsupported structure version was specified in a versioned structure. \
+      This implies that the implementation is older than the header file the application was built against.");
+   _ERROR_TYPE(VDP_STATUS_RESOURCES,"The system does not have enough resources to complete the requested operation at this time.");
+   _ERROR_TYPE(VDP_STATUS_HANDLE_DEVICE_MISMATCH,"The set of handles supplied are not all related to the same VdpDevice.When performing operations \
+      that operate on multiple surfaces, such as VdpOutputSurfaceRenderOutputSurface or VdpVideoMixerRender, \
+      all supplied surfaces must have been created within the context of the same VdpDevice object. \
+      This error is raised if they were not.");
+   _ERROR_TYPE(VDP_STATUS_ERROR,"A catch-all error, used when no other error code applies.");
+   }
 }
diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index 124125ebaad..808ff9e9ce8 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -24,117 +24,112 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  **************************************************************************/
- 
- #include <vdpau/vdpau.h>
- #include <util/u_memory.h>
-  #include <util/u_debug.h>
-  #include "vdpau_private.h"
-
- 
- VdpStatus 	
- vlVdpVideoMixerCreate (VdpDevice device, 
-						uint32_t feature_count, 
-						VdpVideoMixerFeature const *features, 
-						uint32_t parameter_count, 
-						VdpVideoMixerParameter const *parameters, 
-						void const *const *parameter_values, 
-						VdpVideoMixer *mixer)
+
+#include <vdpau/vdpau.h>
+#include <util/u_memory.h>
+#include <util/u_debug.h>
+#include "vdpau_private.h"
+
+VdpStatus
+vlVdpVideoMixerCreate(VdpDevice device,
+                      uint32_t feature_count,
+                      VdpVideoMixerFeature const *features,
+                      uint32_t parameter_count,
+                      VdpVideoMixerParameter const *parameters,
+                      void const *const *parameter_values,
+                      VdpVideoMixer *mixer)
 {
-	VdpStatus ret;
-	vlVdpVideoMixer *vmixer = NULL;
-	
-	debug_printf("[VDPAU] Creating VideoMixer\n");
-	
-	vlVdpDevice *dev = vlGetDataHTAB(device);
-	if (!dev)
+   VdpStatus ret;
+   vlVdpVideoMixer *vmixer = NULL;
+
+   debug_printf("[VDPAU] Creating VideoMixer\n");
+
+   vlVdpDevice *dev = vlGetDataHTAB(device);
+   if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
-	  
-	vmixer = CALLOC(1, sizeof(vlVdpVideoMixer));
-	if (!vmixer)
+
+   vmixer = CALLOC(1, sizeof(vlVdpVideoMixer));
+   if (!vmixer)
       return VDP_STATUS_RESOURCES;
-	  
-	vmixer->device = dev;
-	  /*
-	   * TODO: Handle features and parameters
-	   * */
-	  
-	*mixer = vlAddDataHTAB(vmixer);
-    if (*mixer == 0) {
+
+   vmixer->device = dev;
+   /*
+    * TODO: Handle features and parameters
+    * */
+
+   *mixer = vlAddDataHTAB(vmixer);
+   if (*mixer == 0) {
       ret = VDP_STATUS_ERROR;
       goto no_handle;
-    }
-   
-   
+   }
+
    return VDP_STATUS_OK;
-   no_handle:
+no_handle:
    return ret;
 }
 
 VdpStatus
-vlVdpVideoMixerSetFeatureEnables (
-			VdpVideoMixer mixer, 
-			uint32_t feature_count, 
-			VdpVideoMixerFeature const *features, 
-			VdpBool const *feature_enables)
+vlVdpVideoMixerSetFeatureEnables(VdpVideoMixer mixer,
+                                 uint32_t feature_count,
+                                 VdpVideoMixerFeature const *features,
+                                 VdpBool const *feature_enables)
 {
-	debug_printf("[VDPAU] Setting VideoMixer features\n");
-	
-	if (!(features && feature_enables))	
-		return VDP_STATUS_INVALID_POINTER;
-	
-	vlVdpVideoMixer *vmixer = vlGetDataHTAB(mixer);
-	if (!vmixer)
-		return VDP_STATUS_INVALID_HANDLE;
-		
-	/*
-	   * TODO: Set features
-	   * */
-	
-	
-	return VDP_STATUS_OK;
+   debug_printf("[VDPAU] Setting VideoMixer features\n");
+
+   if (!(features && feature_enables))
+      return VDP_STATUS_INVALID_POINTER;
+
+   vlVdpVideoMixer *vmixer = vlGetDataHTAB(mixer);
+   if (!vmixer)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   /*
+    * TODO: Set features
+    * */
+
+   return VDP_STATUS_OK;
 }
 
-VdpStatus vlVdpVideoMixerRender (
-		VdpVideoMixer mixer, 
-		VdpOutputSurface background_surface, 
-		VdpRect const *background_source_rect, 
-		VdpVideoMixerPictureStructure current_picture_structure, 
-		uint32_t video_surface_past_count, 
-		VdpVideoSurface const *video_surface_past, 
-		VdpVideoSurface video_surface_current, 
-		uint32_t video_surface_future_count, 
-		VdpVideoSurface const *video_surface_future, 
-		VdpRect const *video_source_rect, 
-		VdpOutputSurface destination_surface, 
-		VdpRect const *destination_rect, 
-		VdpRect const *destination_video_rect, 
-		uint32_t layer_count, 
-		VdpLayer const *layers)
+VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
+                                VdpOutputSurface background_surface,
+                                VdpRect const *background_source_rect,
+                                VdpVideoMixerPictureStructure current_picture_structure,
+                                uint32_t video_surface_past_count,
+                                VdpVideoSurface const *video_surface_past,
+                                VdpVideoSurface video_surface_current,
+                                uint32_t video_surface_future_count,
+                                VdpVideoSurface const *video_surface_future,
+                                VdpRect const *video_source_rect,
+                                VdpOutputSurface destination_surface,
+                                VdpRect const *destination_rect,
+                                VdpRect const *destination_video_rect,
+                                uint32_t layer_count,
+                                VdpLayer const *layers)
 {
-	if (!(background_source_rect && video_surface_past && video_surface_future && video_source_rect && destination_rect && destination_video_rect && layers))	
-		return VDP_STATUS_INVALID_POINTER;
+   if (!(background_source_rect && video_surface_past && video_surface_future &&
+         video_source_rect && destination_rect && destination_video_rect && layers))
+      return VDP_STATUS_INVALID_POINTER;
 
-	return VDP_STATUS_NO_IMPLEMENTATION;
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
 VdpStatus
-vlVdpVideoMixerSetAttributeValues (
-		VdpVideoMixer mixer, 
-		uint32_t attribute_count, 
-		VdpVideoMixerAttribute const *attributes, 
-		void const *const *attribute_values)
+vlVdpVideoMixerSetAttributeValues(VdpVideoMixer mixer,
+                                  uint32_t attribute_count,
+                                  VdpVideoMixerAttribute const *attributes,
+                                  void const *const *attribute_values)
 {
-	if (!(attributes && attribute_values))	
-		return VDP_STATUS_INVALID_POINTER;
-	
-	vlVdpVideoMixer *vmixer = vlGetDataHTAB(mixer);
-	if (!vmixer)
-		return VDP_STATUS_INVALID_HANDLE;
-		
-	/*
-	 * TODO: Implement the function
-	 * 
-	 * */
-	
-	return VDP_STATUS_OK;
-}
\ No newline at end of file
+   if (!(attributes && attribute_values))
+      return VDP_STATUS_INVALID_POINTER;
+
+   vlVdpVideoMixer *vmixer = vlGetDataHTAB(mixer);
+   if (!vmixer)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   /*
+    * TODO: Implement the function
+    *
+    * */
+
+   return VDP_STATUS_OK;
+}
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
index 90936584893..7b285079a19 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
@@ -31,119 +31,102 @@
 int
 vlVdpMPEG2NextStartCode(struct vdpMPEG2BitstreamParser *parser)
 {
-	uint32_t integer = 0xffffff00;
-	uint8_t * ptr_read = parser->ptr_bitstream;
-	int8_t * bytes_to_end;
-		
-	bytes_to_end = parser->ptr_bitstream_end - parser->ptr_bitstream;
-		
-	/* Read byte after byte, until startcode is found */
-	while(integer != 0x00000100)
-	{
-		if (bytes_to_end <= 0)
-		{
-			parser->state = MPEG2_BITSTREAM_DONE;
-			parser->code = 0;
-			return 0;
-		}
-		integer = ( integer | *ptr_read++ ) << 8;
-		bytes_to_end--;	
-	}
-	parser->ptr_bitstream = ptr_read;
-	parser->code = parser->ptr_bitstream;
-	/* start_code found. rewind cursor a byte */
-	//parser->cursor -= 8;
-	
-	return 0;
+   uint32_t integer = 0xffffff00;
+   uint8_t * ptr_read = parser->ptr_bitstream;
+   int8_t * bytes_to_end;
+
+   bytes_to_end = parser->ptr_bitstream_end - parser->ptr_bitstream;
+
+   /* Read byte after byte, until startcode is found */
+   while(integer != 0x00000100) {
+      if (bytes_to_end <= 0) {
+         parser->state = MPEG2_BITSTREAM_DONE;
+         parser->code = 0;
+         return 0;
+      }
+      integer = ( integer | *ptr_read++ ) << 8;
+      bytes_to_end--;
+   }
+   parser->ptr_bitstream = ptr_read;
+   parser->code = parser->ptr_bitstream;
+   /* start_code found. rewind cursor a byte */
+   //parser->cursor -= 8;
+
+   return 0;
 }
 
 int
-vlVdpMPEG2BitstreamToMacroblock (
-		  struct pipe_screen *screen,
-		  VdpBitstreamBuffer const *bitstream_buffers,
-		  uint32_t bitstream_buffer_count,
-          unsigned int *num_macroblocks,
-          struct pipe_mpeg12_macroblock **pipe_macroblocks)
+vlVdpMPEG2BitstreamToMacroblock(struct pipe_screen *screen,
+                                VdpBitstreamBuffer const *bitstream_buffers,
+                                uint32_t bitstream_buffer_count,
+                                unsigned int *num_macroblocks,
+                                struct pipe_mpeg12_macroblock **pipe_macroblocks)
 {
-	bool b_header_done = false;
-	struct vdpMPEG2BitstreamParser parser;
-	
-	#if(1)
-	FILE *fp;
-   
-      if ((fp = fopen("binout", "w"))==NULL) {
-        printf("Cannot open file.\n");
-        exit(1);
+   bool b_header_done = false;
+   struct vdpMPEG2BitstreamParser parser;
+
+#if(1)
+   FILE *fp;
+
+   if ((fp = fopen("binout", "w"))==NULL) {
+      printf("Cannot open file.\n");
+      exit(1);
+   }
+   fwrite(bitstream_buffers[0].bitstream, 1, bitstream_buffers[0].bitstream_bytes, fp);
+   fclose(fp);
+
+#endif
+
+   debug_printf("[VDPAU] Starting decoding MPEG2 stream\n");
+
+   num_macroblocks[0] = 0;
+
+   memset(&parser,0,sizeof(parser));
+   parser.state = MPEG2_HEADER_START_CODE;
+   parser.ptr_bitstream = (unsigned char *)bitstream_buffers[0].bitstream;
+   parser.ptr_bitstream_end = parser.ptr_bitstream + bitstream_buffers[0].bitstream_bytes;
+
+   /* Main header parser loop */
+   while(!b_header_done) {
+      switch (parser.state) {
+      case MPEG2_SEEK_HEADER:
+         if (vlVdpMPEG2NextStartCode(&parser))
+            exit(1);
+         break;
+         /* Start_code found */
+         switch (parser.code) {
+         /* sequence_header_code */
+         case 0xB3:
+            debug_printf("[VDPAU][Bitstream parser] Sequence header code found\n");
+            /* We dont need to read this, because we already have this information */
+            break;
+         case 0xB5:
+            debug_printf("[VDPAU][Bitstream parser] Extension start code found\n");
+            //exit(1);
+            break;
+         case 0xB8:
+            debug_printf("[VDPAU][Bitstream parser] Extension start code found\n");
+            //exit(1);
+            break;
+         }
+         break;
+      case MPEG2_BITSTREAM_DONE:
+         if (parser.cur_bitstream < bitstream_buffer_count - 1) {
+            debug_printf("[VDPAU][Bitstream parser] Done parsing current bitstream. Moving to the next\n");
+            parser.cur_bitstream++;
+            parser.ptr_bitstream = (unsigned char *)bitstream_buffers[parser.cur_bitstream].bitstream;
+            parser.ptr_bitstream_end = parser.ptr_bitstream + bitstream_buffers[parser.cur_bitstream].bitstream_bytes;
+            parser.state = MPEG2_HEADER_START_CODE;
+         }
+         else {
+            debug_printf("[VDPAU][Bitstream parser] Done with frame\n");
+            exit(0);
+            // return 0;
+         }
+         break;
       }
-	fwrite(bitstream_buffers[0].bitstream, 1, bitstream_buffers[0].bitstream_bytes, fp);
-	fclose(fp);
-	
-	#endif
-	
-	
-	debug_printf("[VDPAU] Starting decoding MPEG2 stream\n");
-	
-	num_macroblocks[0] = 0;
-	
-	memset(&parser,0,sizeof(parser));
-	parser.state = MPEG2_HEADER_START_CODE;
-	parser.ptr_bitstream = (unsigned char *)bitstream_buffers[0].bitstream;
-	parser.ptr_bitstream_end = parser.ptr_bitstream + bitstream_buffers[0].bitstream_bytes;
-	
-	/* Main header parser loop */
-	while(!b_header_done)
-	{
-		switch (parser.state)
-		{
-		case MPEG2_SEEK_HEADER:
-			if (vlVdpMPEG2NextStartCode(&parser))
-				exit(1);
-			break;
-			/* Start_code found */
-			switch (parser.code)
-			{
-				/* sequence_header_code */
-				case 0xB3:
-				debug_printf("[VDPAU][Bitstream parser] Sequence header code found\n");
-				
-				/* We dont need to read this, because we already have this information */
-				break;
-				case 0xB5:
-				debug_printf("[VDPAU][Bitstream parser] Extension start code found\n");
-				//exit(1);
-				break;
-				
-				case 0xB8:
-				debug_printf("[VDPAU][Bitstream parser] Extension start code found\n");
-				//exit(1);
-				break;
-				
-			}
-		
-		break;
-		case MPEG2_BITSTREAM_DONE:
-			if (parser.cur_bitstream < bitstream_buffer_count - 1)
-			{
-				debug_printf("[VDPAU][Bitstream parser] Done parsing current bitstream. Moving to the next\n");
-				parser.cur_bitstream++;
-				parser.ptr_bitstream = (unsigned char *)bitstream_buffers[parser.cur_bitstream].bitstream;
-				parser.ptr_bitstream_end = parser.ptr_bitstream + bitstream_buffers[parser.cur_bitstream].bitstream_bytes; 
-				parser.state = MPEG2_HEADER_START_CODE;
-			}
-			else
-			{
-				debug_printf("[VDPAU][Bitstream parser] Done with frame\n");
-				exit(0);
-				// return 0;
-			}
-		break;
-		
-		}
-		
-		
-	}
-	
+   }
 
-	return 0;
+   return 0;
 }
 
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
index 25f3516f821..1fa425fdcdb 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
@@ -34,32 +34,30 @@
 
 enum vdpMPEG2States
 {
-	MPEG2_SEEK_HEADER,
-	MPEG2_HEADER_DONE,
-	MPEG2_BITSTREAM_DONE,
-	MPEG2_HEADER_START_CODE
+   MPEG2_SEEK_HEADER,
+   MPEG2_HEADER_DONE,
+   MPEG2_BITSTREAM_DONE,
+   MPEG2_HEADER_START_CODE
 };
 
-
 struct vdpMPEG2BitstreamParser
 {
-	enum vdpMPEG2States state;
-	uint32_t cur_bitstream;
-	const uint8_t *ptr_bitstream_end;
-	const uint8_t *ptr_bitstream;
-	uint8_t code;
-	
-	/* The decoded bitstream goes here: */
-	/* Sequence_header_info */
-	uint32_t horizontal_size_value;
+   enum vdpMPEG2States state;
+   uint32_t cur_bitstream;
+   const uint8_t *ptr_bitstream_end;
+   const uint8_t *ptr_bitstream;
+   uint8_t code;
+
+   /* The decoded bitstream goes here: */
+   /* Sequence_header_info */
+   uint32_t horizontal_size_value;
 };
 
 int
 vlVdpMPEG2BitstreamToMacroblock(struct pipe_screen *screen,
-                  VdpBitstreamBuffer const *bitstream_buffers,
-				  uint32_t bitstream_buffer_count,
-                  unsigned int *num_macroblocks,
-                  struct pipe_mpeg12_macroblock **pipe_macroblocks);
-				  
+                                VdpBitstreamBuffer const *bitstream_buffers,
+                                uint32_t bitstream_buffer_count,
+                                unsigned int *num_macroblocks,
+                                struct pipe_mpeg12_macroblock **pipe_macroblocks);
 
 #endif // MPEG2_BITSTREAM_PARSER_H
diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
index 20097eaf98c..90c66481d0a 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -31,34 +31,34 @@
 #include <util/u_memory.h>
 
 VdpStatus
-vlVdpOutputSurfaceCreate (	VdpDevice  device, 
-							VdpRGBAFormat  rgba_format, 
-							uint32_t width, uint32_t height, 
-							VdpOutputSurface  *surface)
+vlVdpOutputSurfaceCreate(VdpDevice device,
+                         VdpRGBAFormat rgba_format,
+                         uint32_t width, uint32_t height,
+                         VdpOutputSurface  *surface)
 {
-	vlVdpOutputSurface *vlsurface = NULL;
-	
-	debug_printf("[VDPAU] Creating output surface\n");
-	if (!(width && height))
-		return VDP_STATUS_INVALID_SIZE;
-		
-	vlVdpDevice *dev = vlGetDataHTAB(device);
-	if (!dev)
+   vlVdpOutputSurface *vlsurface = NULL;
+
+   debug_printf("[VDPAU] Creating output surface\n");
+   if (!(width && height))
+      return VDP_STATUS_INVALID_SIZE;
+
+   vlVdpDevice *dev = vlGetDataHTAB(device);
+   if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
-	  
-	vlsurface = CALLOC(1, sizeof(vlVdpOutputSurface));
-    if (!vlsurface)
+
+   vlsurface = CALLOC(1, sizeof(vlVdpOutputSurface));
+   if (!vlsurface)
       return VDP_STATUS_RESOURCES;
-	  
-	vlsurface->width = width;
-	vlsurface->height = height;
-	vlsurface->format = FormatRGBAToPipe(rgba_format);
-	  
-	*surface = vlAddDataHTAB(vlsurface);
+
+   vlsurface->width = width;
+   vlsurface->height = height;
+   vlsurface->format = FormatRGBAToPipe(rgba_format);
+
+   *surface = vlAddDataHTAB(vlsurface);
    if (*surface == 0) {
       FREE(dev);
-	  return VDP_STATUS_ERROR;
+      return VDP_STATUS_ERROR;
    }
-	
-	return VDP_STATUS_OK;
-}
\ No newline at end of file
+
+   return VDP_STATUS_OK;
+}
diff --git a/src/gallium/state_trackers/vdpau/preemption.c b/src/gallium/state_trackers/vdpau/preemption.c
index 4572bdcfe6d..fa70bb09cbc 100644
--- a/src/gallium/state_trackers/vdpau/preemption.c
+++ b/src/gallium/state_trackers/vdpau/preemption.c
@@ -24,16 +24,16 @@
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  **************************************************************************/
- 
+
  #include <vdpau/vdpau.h>
- 
- void vlVdpPreemptionCallback (VdpDevice device, void *context)
+
+ void vlVdpPreemptionCallback(VdpDevice device, void *context)
  {
-	 /* TODO: Implement preemption */
+   /* TODO: Implement preemption */
  }
- 
- VdpStatus vlVdpPreemptionCallbackRegister (VdpDevice device, VdpPreemptionCallback callback, void *context)
+
+ VdpStatus vlVdpPreemptionCallbackRegister(VdpDevice device, VdpPreemptionCallback callback,
+                                           void *context)
  {
-	 
-	 return VDP_STATUS_OK;
- }
\ No newline at end of file
+   return VDP_STATUS_OK;
+ }
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index 5f545d0bb27..d4f67379ed2 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -31,119 +31,115 @@
 #include <util/u_memory.h>
 
 VdpStatus
-vlVdpPresentationQueueTargetDestroy (VdpPresentationQueueTarget  presentation_queue_target)
+vlVdpPresentationQueueTargetDestroy(VdpPresentationQueueTarget presentation_queue_target)
 {
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
 VdpStatus
-vlVdpPresentationQueueCreate (	VdpDevice  device, 
-								VdpPresentationQueueTarget  presentation_queue_target, 
-								VdpPresentationQueue  *presentation_queue)
+vlVdpPresentationQueueCreate(VdpDevice device,
+                             VdpPresentationQueueTarget presentation_queue_target,
+                             VdpPresentationQueue *presentation_queue)
 {
-	debug_printf("[VDPAU] Creating PresentationQueue\n");
-	VdpStatus    ret;
-	vlVdpPresentationQueue *pq = NULL;
-	
-	if (!presentation_queue)
-		return VDP_STATUS_INVALID_POINTER;
-	  
+   debug_printf("[VDPAU] Creating PresentationQueue\n");
+   VdpStatus ret;
+   vlVdpPresentationQueue *pq = NULL;
+
+   if (!presentation_queue)
+      return VDP_STATUS_INVALID_POINTER;
+
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
 
    vlVdpPresentationQueueTarget *pqt = vlGetDataHTAB(presentation_queue_target);
    if (!pqt)
-	   return VDP_STATUS_INVALID_HANDLE;
-	   
-	if (dev != pqt->device)
-		return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
+      return VDP_STATUS_INVALID_HANDLE;
+
+   if (dev != pqt->device)
+      return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
 
    pq = CALLOC(1, sizeof(vlVdpPresentationQueue));
    if (!pq)
       return VDP_STATUS_RESOURCES;
-	  
-	*presentation_queue = vlAddDataHTAB(pq);
+
+   *presentation_queue = vlAddDataHTAB(pq);
    if (*presentation_queue == 0) {
       ret = VDP_STATUS_ERROR;
       goto no_handle;
    }
 
-
-	return VDP_STATUS_OK;
-    no_handle:
-    FREE(pq);
-	return ret;
+   return VDP_STATUS_OK;
+no_handle:
+   FREE(pq);
+   return ret;
 }
 
 VdpStatus
-vlVdpPresentationQueueDestroy (VdpPresentationQueue  presentation_queue)
+vlVdpPresentationQueueDestroy(VdpPresentationQueue presentation_queue)
 {
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
 VdpStatus
-vlVdpPresentationQueueSetBackgroundColor (	VdpPresentationQueue  presentation_queue, 
-											VdpColor  *const background_color)
+vlVdpPresentationQueueSetBackgroundColor(VdpPresentationQueue presentation_queue,
+                                         VdpColor *const background_color)
 {
-	if (!background_color)
-		return VDP_STATUS_INVALID_POINTER;
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
+   if (!background_color)
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
 VdpStatus
-vlVdpPresentationQueueGetBackgroundColor (	VdpPresentationQueue  presentation_queue, 
-											VdpColor  *const background_color)
+vlVdpPresentationQueueGetBackgroundColor(VdpPresentationQueue presentation_queue,
+                                         VdpColor *const background_color)
 {
-	if (!background_color)
-		return VDP_STATUS_INVALID_POINTER;
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
+   if (!background_color)
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
 VdpStatus
-vlVdpPresentationQueueGetTime (	VdpPresentationQueue  presentation_queue, 
-								VdpTime  *current_time)
+vlVdpPresentationQueueGetTime(VdpPresentationQueue presentation_queue,
+                              VdpTime *current_time)
 {
-	if (!current_time)
-		return VDP_STATUS_INVALID_POINTER;
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
+   if (!current_time)
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
 VdpStatus
-vlVdpPresentationQueueDisplay (	VdpPresentationQueue  presentation_queue, 
-								VdpOutputSurface  surface, 
-								uint32_t clip_width, 
-								uint32_t clip_height, 
-								VdpTime  earliest_presentation_time)
+vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
+                              VdpOutputSurface surface,
+                              uint32_t clip_width,
+                              uint32_t clip_height,
+                              VdpTime  earliest_presentation_time)
 {
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
 VdpStatus
-vlVdpPresentationQueueBlockUntilSurfaceIdle (	VdpPresentationQueue  presentation_queue, 
-												VdpOutputSurface  surface, 
-												VdpTime  *first_presentation_time)
+vlVdpPresentationQueueBlockUntilSurfaceIdle(VdpPresentationQueue presentation_queue,
+                                            VdpOutputSurface surface,
+                                            VdpTime *first_presentation_time)
 {
-	if (!first_presentation_time)
-		return VDP_STATUS_INVALID_POINTER;
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
+   if (!first_presentation_time)
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
 VdpStatus
-vlVdpPresentationQueueQuerySurfaceStatus (	VdpPresentationQueue  presentation_queue, 
-											VdpOutputSurface  surface, 
-											VdpPresentationQueueStatus  *status, 
-											VdpTime  *first_presentation_time)
+vlVdpPresentationQueueQuerySurfaceStatus(VdpPresentationQueue presentation_queue,
+                                         VdpOutputSurface surface,
+                                         VdpPresentationQueueStatus *status,
+                                         VdpTime *first_presentation_time)
 {
-	if (!(status && first_presentation_time))
-		return VDP_STATUS_INVALID_POINTER;
-	
-	return VDP_STATUS_NO_IMPLEMENTATION;
-}
\ No newline at end of file
+   if (!(status && first_presentation_time))
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
diff --git a/src/gallium/state_trackers/vdpau/query.c b/src/gallium/state_trackers/vdpau/query.c
index a3a8500a6f7..74a3bd925cf 100644
--- a/src/gallium/state_trackers/vdpau/query.c
+++ b/src/gallium/state_trackers/vdpau/query.c
@@ -61,7 +61,7 @@ vlVdpVideoSurfaceQueryCapabilities(VdpDevice device, VdpChromaType surface_chrom
    struct vl_screen *vlscreen;
    uint32_t max_2d_texture_level;
    VdpStatus ret;
-   
+
    debug_printf("[VDPAU] Querying video surfaces\n");
 
    if (!(is_supported && max_width && max_height))
@@ -70,12 +70,12 @@ vlVdpVideoSurfaceQueryCapabilities(VdpDevice device, VdpChromaType surface_chrom
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
-   
+
    vlscreen = vl_screen_create(dev->display, dev->screen);
    if (!vlscreen)
       return VDP_STATUS_RESOURCES;
 
-   /* XXX: Current limits */ 
+   /* XXX: Current limits */
    *is_supported = true;
    if (surface_chroma_type != VDP_CHROMA_TYPE_420)  {
 	  *is_supported = false;
@@ -90,9 +90,9 @@ vlVdpVideoSurfaceQueryCapabilities(VdpDevice device, VdpChromaType surface_chrom
 
    /* I am not quite sure if it is max_2d_texture_level-1 or just max_2d_texture_level */
    *max_width = *max_height = pow(2,max_2d_texture_level-1);
-   
+
    vl_screen_destroy(vlscreen);
-   
+
    return VDP_STATUS_OK;
    no_sup:
    return ret;
@@ -103,10 +103,10 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
                                                   VdpYCbCrFormat bits_ycbcr_format,
                                                   VdpBool *is_supported)
 {
-	struct vl_screen *vlscreen;
-	
-	debug_printf("[VDPAU] Querying get put video surfaces\n");
-	
+   struct vl_screen *vlscreen;
+
+   debug_printf("[VDPAU] Querying get put video surfaces\n");
+
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
 
@@ -118,16 +118,16 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
    if (!vlscreen)
       return VDP_STATUS_RESOURCES;
 
-   if (bits_ycbcr_format != VDP_YCBCR_FORMAT_Y8U8V8A8 && bits_ycbcr_format != VDP_YCBCR_FORMAT_V8U8Y8A8) 
-	                               *is_supported = vlscreen->pscreen->is_format_supported(vlscreen->pscreen,
-                                   FormatToPipe(bits_ycbcr_format),
-                                   PIPE_TEXTURE_2D,
-								   1,
-                                   PIPE_BIND_RENDER_TARGET, 
-                                   PIPE_TEXTURE_GEOM_NON_SQUARE );
-								   
+   if (bits_ycbcr_format != VDP_YCBCR_FORMAT_Y8U8V8A8 && bits_ycbcr_format != VDP_YCBCR_FORMAT_V8U8Y8A8)
+      *is_supported = vlscreen->pscreen->is_format_supported(vlscreen->pscreen,
+                                                             FormatToPipe(bits_ycbcr_format),
+                                                             PIPE_TEXTURE_2D,
+                                                             1,
+                                                             PIPE_BIND_RENDER_TARGET,
+                                                             PIPE_TEXTURE_GEOM_NON_SQUARE);
+
    vl_screen_destroy(vlscreen);
-								   
+
    return VDP_STATUS_OK;
 }
 
@@ -141,16 +141,16 @@ vlVdpDecoderQueryCapabilities(VdpDevice device, VdpDecoderProfile profile,
    uint32_t max_decode_height;
    uint32_t max_2d_texture_level;
    struct vl_screen *vlscreen;
-   
+
    debug_printf("[VDPAU] Querying decoder\n");
-	
+
    if (!(is_supported && max_level && max_macroblocks && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
-	  
+
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
-   
+
    vlscreen = vl_screen_create(dev->display, dev->screen);
    if (!vlscreen)
       return VDP_STATUS_RESOURCES;
@@ -160,24 +160,24 @@ vlVdpDecoderQueryCapabilities(VdpDevice device, VdpDecoderProfile profile,
 	   *is_supported = false;
 	   return VDP_STATUS_OK;
    }
-   
+
    if (p_profile != PIPE_VIDEO_PROFILE_MPEG2_SIMPLE && p_profile != PIPE_VIDEO_PROFILE_MPEG2_MAIN)  {
 	   *is_supported = false;
 	   return VDP_STATUS_OK;
    }
-	   
+
    /* XXX hack, need to implement something more sane when the decoders have been implemented */
    max_2d_texture_level = vlscreen->pscreen->get_param( vlscreen->pscreen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS );
    max_decode_width = max_decode_height = pow(2,max_2d_texture_level-2);
-   if (!(max_decode_width && max_decode_height))  
+   if (!(max_decode_width && max_decode_height))
       return VDP_STATUS_RESOURCES;
-	
+
    *is_supported = true;
    *max_width = max_decode_width;
    *max_height = max_decode_height;
    *max_level = 16;
    *max_macroblocks = (max_decode_width/16) * (max_decode_height/16);
-   
+
    vl_screen_destroy(vlscreen);
 
    return VDP_STATUS_OK;
@@ -186,10 +186,10 @@ vlVdpDecoderQueryCapabilities(VdpDevice device, VdpDecoderProfile profile,
 VdpStatus
 vlVdpOutputSurfaceQueryCapabilities(VdpDevice device, VdpRGBAFormat surface_rgba_format,
                                     VdpBool *is_supported, uint32_t *max_width, uint32_t *max_height)
-{	
+{
    if (!(is_supported && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
-	  
+
    debug_printf("[VDPAU] Querying ouput surfaces\n");
 
    return VDP_STATUS_NO_IMPLEMENTATION;
@@ -200,7 +200,7 @@ vlVdpOutputSurfaceQueryGetPutBitsNativeCapabilities(VdpDevice device, VdpRGBAFor
                                                     VdpBool *is_supported)
 {
    debug_printf("[VDPAU] Querying output surfaces get put native cap\n");
-	
+
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
 
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 1deea3a67d3..d04c517733d 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -28,7 +28,6 @@
 #ifndef VDPAU_PRIVATE_H
 #define VDPAU_PRIVATE_H
 
-
 #include <vdpau/vdpau.h>
 #include <vdpau/vdpau_x11.h>
 #include <pipe/p_compiler.h>
@@ -182,7 +181,7 @@ typedef struct
 
 typedef struct
 {
-	vlVdpDevice *device;
+   vlVdpDevice *device;
 } vlVdpVideoMixer;
 
 typedef struct
@@ -206,12 +205,12 @@ typedef struct
 
 typedef struct
 {
-	vlVdpDevice *device;
-    struct vl_context *vctx;
-	enum pipe_video_chroma_format chroma_format;
-	enum pipe_video_profile profile;
-	uint32_t width;
-	uint32_t height;
+   vlVdpDevice *device;
+   struct vl_context *vctx;
+   enum pipe_video_chroma_format chroma_format;
+   enum pipe_video_profile profile;
+   uint32_t width;
+   uint32_t height;
 } vlVdpDecoder;
 
 typedef uint32_t vlHandle;
@@ -274,5 +273,4 @@ VdpVideoMixerRender vlVdpVideoMixerRender;
 VdpVideoMixerSetAttributeValues vlVdpVideoMixerSetAttributeValues;
 VdpGenerateCSCMatrix vlVdpGenerateCSCMatrix;
 
-
 #endif // VDPAU_PRIVATE_H
-- 
cgit v1.2.3


From d1655b60b08f09873aa4b627e948dfc517e9e3eb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 18 Mar 2011 19:34:02 +0100
Subject: get softpipe winsys to compile again

---
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index cc80583f088..ab52be38c75 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -30,8 +30,10 @@
 #include <state_tracker/xlib_sw_winsys.h>
 #include <util/u_memory.h>
 #include <util/u_format.h>
+#include <util/u_inlines.h>
 #include <softpipe/sp_public.h>
-#include <softpipe/sp_video_context.h>
+#include <pipe/p_state.h>
+#include <pipe/p_video_context.h>
 
 struct vl_xsp_screen
 {
@@ -44,8 +46,9 @@ struct vl_xsp_screen
 };
 
 struct pipe_surface*
-vl_drawable_surface_get(struct vl_screen *vscreen, Drawable drawable)
+vl_drawable_surface_get(struct vl_context *vctx, Drawable drawable)
 {
+   struct vl_screen *vscreen = vctx->vscreen;
    struct vl_xsp_screen *xsp_screen = (struct vl_xsp_screen*)vscreen;
    Window root;
    int x, y;
@@ -53,7 +56,7 @@ vl_drawable_surface_get(struct vl_screen *vscreen, Drawable drawable)
    unsigned int border_width;
    unsigned int depth;
    struct pipe_resource templat, *drawable_tex;
-   struct pipe_surface *drawable_surface = NULL;
+   struct pipe_surface surf_template, *drawable_surface = NULL;
 
    assert(vscreen);
    assert(drawable != None);
@@ -89,9 +92,9 @@ vl_drawable_surface_get(struct vl_screen *vscreen, Drawable drawable)
    if (!drawable_tex)
       return NULL;
 
-   xsp_screen->drawable_surface = vscreen->pscreen->get_tex_surface(vscreen->pscreen, drawable_tex,
-                                                                    0, 0, 0,
-                                                                    templat.bind);
+   memset(&surf_template, 0, sizeof(surf_template));
+   xsp_screen->drawable_surface = vctx->vpipe->create_surface(vctx->vpipe, drawable_tex,
+                                                              &surf_template);
    pipe_resource_reference(&drawable_tex, NULL);
 
    if (!xsp_screen->drawable_surface)
-- 
cgit v1.2.3


From cbb3ad3d5aaeecfdd724ef0de120969031bb23fc Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 18 Mar 2011 21:12:08 +0100
Subject: vdpau: fix formating and idention of surface.c

---
 src/gallium/state_trackers/vdpau/surface.c | 239 ++++++++++++++---------------
 1 file changed, 112 insertions(+), 127 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index 0ebfd12d988..a625ea401ba 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -32,162 +32,147 @@
 #include <util/u_format.h>
 
 VdpStatus
-vlVdpVideoSurfaceCreate(VdpDevice device,
- VdpChromaType chroma_type,
- uint32_t width,
- uint32_t height,
- VdpVideoSurface *surface)
+vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
+                        uint32_t width, uint32_t height,
+                        VdpVideoSurface *surface)
 {
- printf("[VDPAU] Creating a surface\n");
-
- vlVdpSurface *p_surf;
- VdpStatus ret;
-
- if (!(width && height))
- {
- ret = VDP_STATUS_INVALID_SIZE;
- goto inv_size;
- }
-
-
- if (!vlCreateHTAB()) {
- ret = VDP_STATUS_RESOURCES;
- goto no_htab;
- }
-
- p_surf = CALLOC(1, sizeof(p_surf));
- if (!p_surf) {
- ret = VDP_STATUS_RESOURCES;
- goto no_res;
- }
-
- vlVdpDevice *dev = vlGetDataHTAB(device);
- if (!dev) {
- ret = VDP_STATUS_INVALID_HANDLE;
- goto inv_device;
- }
-
- p_surf->chroma_format = TypeToPipe(chroma_type);
- p_surf->device = dev;
- p_surf->width = width;
- p_surf->height = height;
-
- *surface = vlAddDataHTAB(p_surf);
- if (*surface == 0) {
- ret = VDP_STATUS_ERROR;
- goto no_handle;
- }
-
- return VDP_STATUS_OK;
+   printf("[VDPAU] Creating a surface\n");
+
+   vlVdpSurface *p_surf;
+   VdpStatus ret;
+
+   if (!(width && height)) {
+      ret = VDP_STATUS_INVALID_SIZE;
+      goto inv_size;
+   }
+
+   if (!vlCreateHTAB()) {
+      ret = VDP_STATUS_RESOURCES;
+      goto no_htab;
+   }
+
+   p_surf = CALLOC(1, sizeof(p_surf));
+   if (!p_surf) {
+      ret = VDP_STATUS_RESOURCES;
+      goto no_res;
+   }
+
+   vlVdpDevice *dev = vlGetDataHTAB(device);
+   if (!dev) {
+      ret = VDP_STATUS_INVALID_HANDLE;
+      goto inv_device;
+   }
+
+   p_surf->chroma_format = TypeToPipe(chroma_type);
+   p_surf->device = dev;
+   p_surf->width = width;
+   p_surf->height = height;
+
+   *surface = vlAddDataHTAB(p_surf);
+   if (*surface == 0) {
+      ret = VDP_STATUS_ERROR;
+      goto no_handle;
+   }
+
+   return VDP_STATUS_OK;
 
 no_handle:
- FREE(p_surf->psurface);
+   FREE(p_surf->psurface);
 inv_device:
 no_surf:
- FREE(p_surf);
+   FREE(p_surf);
 no_res:
- // vlDestroyHTAB(); XXX: Do not destroy this tab, I think.
+   // vlDestroyHTAB(); XXX: Do not destroy this tab, I think.
 no_htab:
 inv_size:
- return ret;
+   return ret;
 }
 
 VdpStatus
-vlVdpVideoSurfaceDestroy ( VdpVideoSurface surface )
+vlVdpVideoSurfaceDestroy(VdpVideoSurface surface)
 {
- vlVdpSurface *p_surf;
-
- p_surf = (vlVdpSurface *)vlGetDataHTAB((vlHandle)surface);
- if (!p_surf)
- return VDP_STATUS_INVALID_HANDLE;
-
- if (p_surf->psurface) {
- if (p_surf->psurface->texture) {
- if (p_surf->psurface->texture->screen)
- p_surf->psurface->context->surface_destroy(p_surf->psurface->context, p_surf->psurface);
- }
- }
- FREE(p_surf);
- return VDP_STATUS_OK;
+   vlVdpSurface *p_surf;
+
+   p_surf = (vlVdpSurface *)vlGetDataHTAB((vlHandle)surface);
+   if (!p_surf)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   if (p_surf->psurface) {
+      if (p_surf->psurface->texture) {
+         if (p_surf->psurface->texture->screen)
+            p_surf->psurface->context->surface_destroy(p_surf->psurface->context, p_surf->psurface);
+      }
+   }
+   FREE(p_surf);
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
-vlVdpVideoSurfaceGetParameters ( VdpVideoSurface surface,
- VdpChromaType *chroma_type,
- uint32_t *width,
- uint32_t *height
-)
+vlVdpVideoSurfaceGetParameters(VdpVideoSurface surface,
+                               VdpChromaType *chroma_type,
+                               uint32_t *width, uint32_t *height)
 {
- if (!(width && height && chroma_type))
- return VDP_STATUS_INVALID_POINTER;
-
+   if (!(width && height && chroma_type))
+      return VDP_STATUS_INVALID_POINTER;
 
- vlVdpSurface *p_surf = vlGetDataHTAB(surface);
- if (!p_surf)
- return VDP_STATUS_INVALID_HANDLE;
+   vlVdpSurface *p_surf = vlGetDataHTAB(surface);
+   if (!p_surf)
+      return VDP_STATUS_INVALID_HANDLE;
 
+   if (!(p_surf->chroma_format > 0 && p_surf->chroma_format < 3))
+      return VDP_STATUS_INVALID_CHROMA_TYPE;
 
- if (!(p_surf->chroma_format > 0 && p_surf->chroma_format < 3))
- return VDP_STATUS_INVALID_CHROMA_TYPE;
+   *width = p_surf->width;
+   *height = p_surf->height;
+   *chroma_type = PipeToType(p_surf->chroma_format);
 
- *width = p_surf->width;
- *height = p_surf->height;
- *chroma_type = PipeToType(p_surf->chroma_format);
-
- return VDP_STATUS_OK;
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
-vlVdpVideoSurfaceGetBitsYCbCr ( VdpVideoSurface surface,
- VdpYCbCrFormat destination_ycbcr_format,
- void *const *destination_data,
- uint32_t const *destination_pitches
-)
+vlVdpVideoSurfaceGetBitsYCbCr(VdpVideoSurface surface,
+                              VdpYCbCrFormat destination_ycbcr_format,
+                              void *const *destination_data,
+                              uint32_t const *destination_pitches)
 {
- if (!vlCreateHTAB())
- return VDP_STATUS_RESOURCES;
-
-
- vlVdpSurface *p_surf = vlGetDataHTAB(surface);
- if (!p_surf)
- return VDP_STATUS_INVALID_HANDLE;
+   if (!vlCreateHTAB())
+      return VDP_STATUS_RESOURCES;
 
- if (!p_surf->psurface)
- return VDP_STATUS_RESOURCES;
+   vlVdpSurface *p_surf = vlGetDataHTAB(surface);
+   if (!p_surf)
+      return VDP_STATUS_INVALID_HANDLE;
 
+   if (!p_surf->psurface)
+      return VDP_STATUS_RESOURCES;
 
- return VDP_STATUS_OK;
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
-vlVdpVideoSurfacePutBitsYCbCr ( VdpVideoSurface surface,
- VdpYCbCrFormat source_ycbcr_format,
- void const *const *source_data,
- uint32_t const *source_pitches
-)
+vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface,
+                              VdpYCbCrFormat source_ycbcr_format,
+                              void const *const *source_data,
+                              uint32_t const *source_pitches)
 {
- uint32_t size_surface_bytes;
- const struct util_format_description *format_desc;
- enum pipe_format pformat = FormatToPipe(source_ycbcr_format);
-
- if (!vlCreateHTAB())
- return VDP_STATUS_RESOURCES;
-
-
- vlVdpSurface *p_surf = vlGetDataHTAB(surface);
- if (!p_surf)
- return VDP_STATUS_INVALID_HANDLE;
-
-
- //size_surface_bytes = ( source_pitches[0] * p_surf->height util_format_get_blockheight(pformat) );
- /*util_format_translate(enum pipe_format dst_format,
- void *dst, unsigned dst_stride,
- unsigned dst_x, unsigned dst_y,
- enum pipe_format src_format,
- const void *src, unsigned src_stride,
- unsigned src_x, unsigned src_y,
- unsigned width, unsigned height);*/
-
- return VDP_STATUS_NO_IMPLEMENTATION;
-
+   uint32_t size_surface_bytes;
+   const struct util_format_description *format_desc;
+   enum pipe_format pformat = FormatToPipe(source_ycbcr_format);
+
+   if (!vlCreateHTAB())
+      return VDP_STATUS_RESOURCES;
+
+   vlVdpSurface *p_surf = vlGetDataHTAB(surface);
+   if (!p_surf)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   //size_surface_bytes = ( source_pitches[0] * p_surf->height util_format_get_blockheight(pformat) );
+   /*util_format_translate(enum pipe_format dst_format,
+   void *dst, unsigned dst_stride,
+   unsigned dst_x, unsigned dst_y,
+   enum pipe_format src_format,
+   const void *src, unsigned src_stride,
+   unsigned src_x, unsigned src_y,
+   unsigned width, unsigned height);*/
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
-- 
cgit v1.2.3


From f36846c77ee196881c0da560229279fc7ed88170 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 18 Mar 2011 21:44:47 +0100
Subject: vdpau: add vdpau-r600 target

---
 configure.ac                            |  2 +-
 src/gallium/targets/Makefile.vdpau      |  1 +
 src/gallium/targets/vdpau-r600/Makefile | 20 ++++++++++++++++++++
 src/gallium/targets/vdpau-r600/target.c | 24 ++++++++++++++++++++++++
 4 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/targets/vdpau-r600/Makefile
 create mode 100644 src/gallium/targets/vdpau-r600/target.c

diff --git a/configure.ac b/configure.ac
index d69fb7c4958..d0a67cc379c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1788,7 +1788,7 @@ AC_ARG_ENABLE([gallium-r600],
     [enable_gallium_r600=auto])
 if test "x$enable_gallium_r600" = xyes; then
     GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
-    gallium_check_st "r600/drm" "dri-r600" "xvmc-r600"
+    gallium_check_st "r600/drm" "dri-r600" "xvmc-r600" "vdpau-r600"
 fi
 
 dnl
diff --git a/src/gallium/targets/Makefile.vdpau b/src/gallium/targets/Makefile.vdpau
index 2accbeb702e..6aa01037afa 100644
--- a/src/gallium/targets/Makefile.vdpau
+++ b/src/gallium/targets/Makefile.vdpau
@@ -8,6 +8,7 @@ VDPAU_MINOR = 0
 INCLUDES = -I$(TOP)/src/gallium/include \
 	   -I$(TOP)/src/gallium/drivers \
 	   -I$(TOP)/src/gallium/auxiliary \
+	   -I$(TOP)/src/gallium/winsys \
 	   -I$(TOP)/src/gallium/winsys/g3dvl \
 	   $(DRIVER_INCLUDES)
 DEFINES = -DGALLIUM_TRACE -DVER_MAJOR=$(VDPAU_MAJOR) -DVER_MINOR=$(VDPAU_MINOR) $(DRIVER_DEFINES)
diff --git a/src/gallium/targets/vdpau-r600/Makefile b/src/gallium/targets/vdpau-r600/Makefile
new file mode 100644
index 00000000000..efcaaaa4e18
--- /dev/null
+++ b/src/gallium/targets/vdpau-r600/Makefile
@@ -0,0 +1,20 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+PIPE_DRIVERS = \
+        $(TOP)/src/gallium/drivers/r600/libr600.a \
+	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
+        $(TOP)/src/gallium/winsys/r600/drm/libr600winsys.a \
+        $(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/auxiliary/libgallium.a
+
+C_SOURCES = \
+	target.c \
+	$(COMMON_GALLIUM_SOURCES) \
+	$(DRIVER_SOURCES)
+
+DRIVER_LIBS = $(shell pkg-config libdrm_radeon --libs) -lXfixes
+
+include ../Makefile.vdpau
+
+symlinks:
diff --git a/src/gallium/targets/vdpau-r600/target.c b/src/gallium/targets/vdpau-r600/target.c
new file mode 100644
index 00000000000..8753e2bab17
--- /dev/null
+++ b/src/gallium/targets/vdpau-r600/target.c
@@ -0,0 +1,24 @@
+#include "state_tracker/drm_driver.h"
+#include "target-helpers/inline_debug_helper.h"
+#include "r600/drm/r600_drm_public.h"
+#include "r600/r600_public.h"
+
+static struct pipe_screen *create_screen(int fd)
+{
+   struct radeon *radeon;
+   struct pipe_screen *screen;
+
+   radeon = r600_drm_winsys_create(fd);
+   if (!radeon)
+      return NULL;
+
+   screen = r600_screen_create(radeon);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+DRM_DRIVER_DESCRIPTOR("r600", "radeon", create_screen)
-- 
cgit v1.2.3


From 74e1d64c6da055ad0142c227845087003acfaa51 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 19 Mar 2011 11:40:22 +0100
Subject: r600g: revert some asm optimisations

They didn't have the desired effect and are still quite buggy
---
 src/gallium/drivers/r600/eg_asm.c      |   24 +-
 src/gallium/drivers/r600/r600_asm.c    | 1139 ++++----------------------------
 src/gallium/drivers/r600/r600_asm.h    |    4 +-
 src/gallium/drivers/r600/r600_shader.c |   18 +-
 4 files changed, 157 insertions(+), 1028 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 20a319a255d..3793b919dde 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -32,14 +32,12 @@
 int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 {
 	unsigned id = cf->id;
-	unsigned end_of_program = bc->cf.prev == &cf->list;
 
 	switch (cf->inst) {
 	case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
 	case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
 	case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
 	case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
-		assert(!end_of_program);
 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
 			S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
 			S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
@@ -48,16 +46,15 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
-			S_SQ_CF_ALU_WORD1_BARRIER(cf->barrier) |
-			S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
+					S_SQ_CF_ALU_WORD1_BARRIER(1) |
+					S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
 		break;
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX:
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
-			S_SQ_CF_WORD1_BARRIER(cf->barrier) |
-			S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1) |
-			S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
+					S_SQ_CF_WORD1_BARRIER(1) |
+					S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
 		break;
 	case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
 	case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
@@ -70,9 +67,9 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end_of_program);
+			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
 		break;
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
@@ -85,10 +82,9 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
-			S_SQ_CF_WORD1_BARRIER(cf->barrier) |
-			S_SQ_CF_WORD1_COND(cf->cond) |
-			S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
-			S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
+					S_SQ_CF_WORD1_BARRIER(1) |
+					S_SQ_CF_WORD1_COND(cf->cond) |
+					S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
 
 		break;
 	default:
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 45e614977e2..240093f9b9d 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -35,9 +35,6 @@
 #define NUM_OF_CYCLES 3
 #define NUM_OF_COMPONENTS 4
 
-#define PREV_ALU(alu) LIST_ENTRY(struct r600_bc_alu, alu->list.prev, list)
-#define NEXT_ALU(alu) LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)
-
 static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu)
 {
 	if(alu->is_op3)
@@ -163,7 +160,6 @@ static struct r600_bc_cf *r600_bc_cf(void)
 	LIST_INITHEAD(&cf->alu);
 	LIST_INITHEAD(&cf->vtx);
 	LIST_INITHEAD(&cf->tex);
-	cf->barrier = 1;
 	return cf;
 }
 
@@ -252,49 +248,13 @@ static int r600_bc_add_cf(struct r600_bc *bc)
 	return 0;
 }
 
-static void r600_bc_remove_cf(struct r600_bc *bc, struct r600_bc_cf *cf)
-{
-	struct r600_bc_cf *other;
-	LIST_FOR_EACH_ENTRY(other, &bc->cf, list) {
-		if (other->id > cf->id)
-			other->id -= 2;
-		if (other->cf_addr > cf->id)
-			other->cf_addr -= 2;
-	}
-	LIST_DEL(&cf->list);
-	free(cf);
-}
-
-static void r600_bc_move_cf(struct r600_bc *bc, struct r600_bc_cf *cf, struct r600_bc_cf *next)
-{
-	struct r600_bc_cf *prev = LIST_ENTRY(struct r600_bc_cf, next->list.prev, list);
-	unsigned old_id = cf->id;
-	unsigned new_id = next->list.prev == &bc->cf ? 0 : prev->id + 2;
-	struct r600_bc_cf *other;
-
-	if (prev == cf || next == cf)
-		return; /* position hasn't changed */
-
-	LIST_DEL(&cf->list);
-	LIST_FOR_EACH_ENTRY(other, &bc->cf, list) {
-		if (other->id > old_id)
-			other->id -= 2;
-		if (other->id >= new_id)
-			other->id += 2;
-		if (other->cf_addr > old_id)
-			other->cf_addr -= 2;
-		if (other->cf_addr > new_id)
-			other->cf_addr += 2;
-	}
-	cf->id = new_id;
-	LIST_ADD(&cf->list, &prev->list);
-}
-
 int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 {
 	int r;
 
-	if (bc->cf_last && bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) &&
+	if (bc->cf_last && (bc->cf_last->inst == output->inst ||
+		(bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) &&
+		output->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE))) &&
 		output->type == bc->cf_last->output.type &&
 		output->elem_size == bc->cf_last->output.elem_size &&
 		output->swizzle_x == bc->cf_last->output.swizzle_x &&
@@ -306,6 +266,8 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 		if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
 			(output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
 
+			bc->cf_last->output.end_of_program |= output->end_of_program;
+			bc->cf_last->output.inst = output->inst;
 			bc->cf_last->output.gpr = output->gpr;
 			bc->cf_last->output.array_base = output->array_base;
 			bc->cf_last->output.burst_count += output->burst_count;
@@ -314,6 +276,8 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 		} else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
 			output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
 
+			bc->cf_last->output.end_of_program |= output->end_of_program;
+			bc->cf_last->output.inst = output->inst;
 			bc->cf_last->output.burst_count += output->burst_count;
 			return 0;
 		}
@@ -322,19 +286,28 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 	r = r600_bc_add_cf(bc);
 	if (r)
 		return r;
-	bc->cf_last->inst = BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
+	bc->cf_last->inst = output->inst;
 	memcpy(&bc->cf_last->output, output, sizeof(struct r600_bc_output));
-	bc->cf_last->output.burst_count = 1;
 	return 0;
 }
 
-/* alu predicate instructions */
-static int is_alu_pred_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+/* alu instructions that can ony exits once per group */
+static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 {
 	switch (bc->chiprev) {
 	case CHIPREV_R600:
 	case CHIPREV_R700:
 		return !alu->is_op3 && (
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
 			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
 			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
 			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
@@ -362,6 +335,16 @@ static int is_alu_pred_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 	case CHIPREV_EVERGREEN:
 	default:
 		return !alu->is_op3 && (
+			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
 			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
 			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
 			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
@@ -389,46 +372,6 @@ static int is_alu_pred_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 	}
 }
 
-/* alu kill instructions */
-static int is_alu_kill_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
-{
-	switch (bc->chiprev) {
-	case CHIPREV_R600:
-	case CHIPREV_R700:
-		return !alu->is_op3 && (
-			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
-			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
-			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
-			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
-			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
-			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
-			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
-			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
-			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
-			alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT);
-	case CHIPREV_EVERGREEN:
-	default:
-		return !alu->is_op3 && (
-			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
-			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
-			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
-			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
-			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
-			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
-			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
-			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
-			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
-			alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT);
-	}
-}
-
-/* alu instructions that can ony exits once per group */
-static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
-{
-	return is_alu_kill_inst(bc, alu) ||
-		is_alu_pred_inst(bc, alu);
-}
-
 static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 {
 	switch (bc->chiprev) {
@@ -1307,16 +1250,6 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
 	return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
 }
 
-static void r600_bc_remove_alu(struct r600_bc_cf *cf, struct r600_bc_alu *alu)
-{
-	if (alu->last && alu->list.prev != &cf->alu) {
-		PREV_ALU(alu)->last = 1;
-	}
-	LIST_DEL(&alu->list);
-	free(alu);
-	cf->ndw -= 2;
-}
-
 static unsigned r600_bc_num_tex_and_vtx_instructions(const struct r600_bc *bc)
 {
 	switch (bc->chiprev) {
@@ -1528,64 +1461,16 @@ static void r600_bc_cf_vtx_build(uint32_t *bytecode, const struct r600_bc_cf *cf
 			S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
 }
 
-enum cf_class
+/* common for r600/r700 - eg in eg_asm.c */
+static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 {
-	CF_CLASS_ALU,
-	CF_CLASS_TEXTURE,
-	CF_CLASS_VERTEX,
-	CF_CLASS_EXPORT,
-	CF_CLASS_OTHER
-};
+	unsigned id = cf->id;
 
-static enum cf_class r600_bc_cf_class(struct r600_bc_cf *cf)
-{
 	switch (cf->inst) {
 	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
 	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
 	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
-	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
-		return CF_CLASS_ALU;
-
-	case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
-		return CF_CLASS_TEXTURE;
-
-	case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
-	case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
-		return CF_CLASS_VERTEX;
-
-	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
-	case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-	case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
-		return CF_CLASS_EXPORT;
-
-	case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
-	case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
-	case V_SQ_CF_WORD1_SQ_CF_INST_POP:
-	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
-	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
-	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
-	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
-	case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
-	case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
-	case V_SQ_CF_WORD1_SQ_CF_INST_NOP:
-		return CF_CLASS_OTHER;
-
-	default:
-		R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
-		return -EINVAL;
-	}
-}
-
-/* common for r600/r700 - eg in eg_asm.c */
-static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
-{
-	unsigned id = cf->id;
-	unsigned end_of_program = bc->cf.prev == &cf->list;
-
-	switch (r600_bc_cf_class(cf)) {
-	case CF_CLASS_ALU:
-		assert(!end_of_program);
 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
 			S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
 			S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
@@ -1595,18 +1480,20 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
-			S_SQ_CF_ALU_WORD1_BARRIER(cf->barrier) |
-			S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) |
-			S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
+					S_SQ_CF_ALU_WORD1_BARRIER(1) |
+					S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) |
+					S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
 		break;
-	case CF_CLASS_TEXTURE:
-	case CF_CLASS_VERTEX:
+	case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+	case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+	case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
 		if (bc->chiprev == CHIPREV_R700)
 			r700_bc_cf_vtx_build(&bc->bytecode[id], cf);
 		else
 			r600_bc_cf_vtx_build(&bc->bytecode[id], cf);
 		break;
-	case CF_CLASS_EXPORT:
+	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+	case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
 		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
 			S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
 			S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
@@ -1616,17 +1503,24 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end_of_program);
+			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
 		break;
-	case CF_CLASS_OTHER:
+	case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+	case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+	case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+	case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+	case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
-			S_SQ_CF_WORD1_BARRIER(cf->barrier) |
-			S_SQ_CF_WORD1_COND(cf->cond) |
-			S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
-			S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program);
+					S_SQ_CF_WORD1_BARRIER(1) |
+			                S_SQ_CF_WORD1_COND(cf->cond) |
+			                S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
 
 		break;
 	default:
@@ -1636,819 +1530,12 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 	return 0;
 }
 
-struct gpr_usage_range {
-	int	replacement;
-	int	rel_block;
-	int	start;
-	int	end;
-};
-
-struct gpr_usage {
-	unsigned		channels:4;
-	int			first_write;
-	int			last_write[4];
-	unsigned	        nranges;
-	struct gpr_usage_range  *ranges;
-};
-
-static struct gpr_usage_range* last_gpr_usage_range(struct gpr_usage *usage)
-{
-	if (usage->nranges)
-		return usage->ranges + usage->nranges - 1;
-	else
-		return NULL;
-}
-
-static struct gpr_usage_range* add_gpr_usage_range(struct gpr_usage *usage)
-{
-	struct gpr_usage_range *range;
-
-	usage->nranges++;
-	usage->ranges = realloc(usage->ranges, usage->nranges * sizeof(struct gpr_usage_range));
-	if (!usage->ranges)
-		return NULL;
-
-	range = last_gpr_usage_range(usage);
-	range->replacement = -1; /* no prefered replacement */
-	range->rel_block = -1;
-	range->start = -1;
-	range->end = -1;
-
-	return range;
-}
-
-static void notice_gpr_read(struct gpr_usage *usage, int id, unsigned chan)
-{
-	struct gpr_usage_range* range;
-
-        usage->channels |= 1 << chan;
-        usage->first_write = -1;
-        if (!usage->nranges) {
-        	range = add_gpr_usage_range(usage);
-        } else
-		range = last_gpr_usage_range(usage);
-
-        if (range && range->end < id)
-		range->end = id;
-}
-
-static void notice_gpr_rel_read(struct r600_bc *bc, struct gpr_usage usage[128],
-				int id, unsigned gpr, unsigned chan)
-{
-	unsigned i;
-	for (i = gpr; i < bc->ngpr; ++i)
-		notice_gpr_read(&usage[i], id, chan);
-
-	last_gpr_usage_range(&usage[gpr])->rel_block = bc->ngpr - gpr;
-}
-
-static void notice_gpr_last_write(struct gpr_usage *usage, int id, unsigned chan)
-{
-        usage->last_write[chan] = id;
-}
-
-static void notice_gpr_write(struct gpr_usage *usage, int id, unsigned chan,
-				int predicate, int prefered_replacement)
-{
-	struct gpr_usage_range* last_range = last_gpr_usage_range(usage);
-	int start = usage->first_write != -1 ? usage->first_write : id;
-	usage->channels &= ~(1 << chan);
-	if (usage->channels) {
-		if (usage->first_write == -1)
-			usage->first_write = id;
-	} else if (!last_range || (last_range->start != start && !predicate)) {
-		usage->first_write = start;
-		struct gpr_usage_range* range = add_gpr_usage_range(usage);
-		range->replacement = prefered_replacement;
-                range->start = start;
-        } else if (last_range->start == start && prefered_replacement != -1) {
-        	last_range->replacement = prefered_replacement;
-        }
-        notice_gpr_last_write(usage, id, chan);
-}
-
-static void notice_gpr_rel_last_write(struct gpr_usage usage[128], int id, unsigned chan)
-{
-	unsigned i;
-	for (i = 0; i < 128; ++i)
-		notice_gpr_last_write(&usage[i], id, chan);
-}
-
-static void notice_gpr_rel_write(struct gpr_usage usage[128], int id, unsigned chan)
-{
-	unsigned i;
-	for (i = 0; i < 128; ++i)
-		notice_gpr_write(&usage[i], id, chan, 1, -1);
-}
-
-static void notice_alu_src_gprs(struct r600_bc *bc, struct r600_bc_alu *alu,
-                                struct gpr_usage usage[128], int id)
-{
-	unsigned src, num_src;
-
-	num_src = r600_bc_get_num_operands(bc, alu);
-	for (src = 0; src < num_src; ++src) {
-		// constants doesn't matter
-		if (!is_gpr(alu->src[src].sel))
-			continue;
-
-		if (alu->src[src].rel)
-			notice_gpr_rel_read(bc, usage, id, alu->src[src].sel, alu->src[src].chan);
-		else
-			notice_gpr_read(&usage[alu->src[src].sel], id, alu->src[src].chan);
-	}
-}
-
-static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage usage[128],
-				int id, int predicate)
-{
-	struct r600_bc_alu *alu;
-	for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
-		if (alu->dst.write) {
-			if (alu->dst.rel)
-				notice_gpr_rel_write(usage, id, alu->dst.chan);
-			else if (alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV && is_gpr(alu->src[0].sel))
-				notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan,
-						predicate, alu->src[0].sel);
-			else
-				notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan, predicate, -1);
-		}
-
-		if (alu->last)
-			break;
-	}
-}
-
-static void notice_tex_gprs(struct r600_bc *bc, struct r600_bc_tex *tex,
-				struct gpr_usage usage[128],
-				int id, int predicate)
-{
-	if (tex->src_rel) {
-                if (tex->src_sel_x < 4)
-			notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_x);
-		if (tex->src_sel_y < 4)
-			notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_y);
-		if (tex->src_sel_z < 4)
-			notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_z);
-		if (tex->src_sel_w < 4)
-			notice_gpr_rel_read(bc, usage, id, tex->src_gpr, tex->src_sel_w);
-        } else {
-		if (tex->src_sel_x < 4)
-			notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_x);
-		if (tex->src_sel_y < 4)
-			notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_y);
-		if (tex->src_sel_z < 4)
-			notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_z);
-		if (tex->src_sel_w < 4)
-			notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_w);
-	}
-	if (tex->dst_rel) {
-		if (tex->dst_sel_x != 7)
-			notice_gpr_rel_write(usage, id, 0);
-		if (tex->dst_sel_y != 7)
-			notice_gpr_rel_write(usage, id, 1);
-		if (tex->dst_sel_z != 7)
-			notice_gpr_rel_write(usage, id, 2);
-		if (tex->dst_sel_w != 7)
-			notice_gpr_rel_write(usage, id, 3);
-	} else {
-		if (tex->dst_sel_x != 7)
-			notice_gpr_write(&usage[tex->dst_gpr], id, 0, predicate, -1);
-		if (tex->dst_sel_y != 7)
-			notice_gpr_write(&usage[tex->dst_gpr], id, 1, predicate, -1);
-		if (tex->dst_sel_z != 7)
-			notice_gpr_write(&usage[tex->dst_gpr], id, 2, predicate, -1);
-		if (tex->dst_sel_w != 7)
-			notice_gpr_write(&usage[tex->dst_gpr], id, 3, predicate, -1);
-	}
-}
-
-static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128],
-				int id, int predicate)
-{
-	notice_gpr_read(&usage[vtx->src_gpr], id, vtx->src_sel_x);
-
-	if (vtx->dst_sel_x != 7)
-		notice_gpr_write(&usage[vtx->dst_gpr], id, 0, predicate, -1);
-	if (vtx->dst_sel_y != 7)
-		notice_gpr_write(&usage[vtx->dst_gpr], id, 1, predicate, -1);
-	if (vtx->dst_sel_z != 7)
-		notice_gpr_write(&usage[vtx->dst_gpr], id, 2, predicate, -1);
-	if (vtx->dst_sel_w != 7)
-		notice_gpr_write(&usage[vtx->dst_gpr], id, 3, predicate, -1);
-}
-
-static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128],
-				struct r600_bc_cf *export_cf[128], int export_remap[128])
-{
-	//TODO handle other memory operations
-	struct gpr_usage *output = &usage[cf->output.gpr];
-	int id = MAX4(output->last_write[0], output->last_write[1],
-		output->last_write[2], output->last_write[3]);
-	id += 0x100;
-	id &= ~0xFF;
-
-	export_cf[cf->output.gpr] = cf;
-	export_remap[cf->output.gpr] = id;
-	if (cf->output.swizzle_x < 4)
-		notice_gpr_read(output, id, cf->output.swizzle_x);
-	if (cf->output.swizzle_y < 4)
-		notice_gpr_read(output, id, cf->output.swizzle_y);
-	if (cf->output.swizzle_z < 4)
-		notice_gpr_read(output, id, cf->output.swizzle_z);
-	if (cf->output.swizzle_w < 4)
-		notice_gpr_read(output, id, cf->output.swizzle_w);
-}
-
-static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int id)
-{
-	unsigned i;
-	for (i = 0; i < usage->nranges; ++i) {
-		struct gpr_usage_range* range = &usage->ranges[i];
-
-		if (range->start < id && id <= range->end)
-			return range;
-	}
-	return NULL;
-}
-
-static struct gpr_usage_range *find_dst_range(struct gpr_usage *usage, int id)
-{
-	unsigned i;
-	for (i = 0; i < usage->nranges; ++i) {
-		struct gpr_usage_range* range = &usage->ranges[i];
-		int end = range->end;
-
-		if (range->start <= id && (id < end || end == -1))
-			return range;
-	}
-	return NULL;
-}
-
-static int is_barrier_needed(struct gpr_usage *usage, int id, unsigned chan, int last_barrier)
-{
-	if (usage->last_write[chan] != (id & ~0xFF))
-		return usage->last_write[chan] >= last_barrier;
-	else
-		return 0;
-}
-
-static int is_intersection(struct gpr_usage_range* a, struct gpr_usage_range* b)
-{
-	return a->start <= b->end && b->start < a->end;
-}
-
-static int rate_replacement(struct gpr_usage usage[128], unsigned current, unsigned gpr,
-				struct gpr_usage_range* range)
-{
-	int max_gpr = gpr + MAX2(range->rel_block, 1);
-	int best_start = 0x3FFFFFFF, best_end = 0x3FFFFFFF;
-	unsigned i;
-
-	for (; gpr < max_gpr; ++gpr) {
-
-		if (gpr >= 128) /* relative gpr block won't fit into clause temporaries */
-			return -1; /* forget it */
-
-		if (gpr == current) /* ignore ranges of to be replaced register */
-			continue;
-
-		for (i = 0; i < usage[gpr].nranges; ++i) {
-			if (usage[gpr].ranges[i].replacement < gpr)
-				continue; /* ignore already remapped ranges */
-
-			if (is_intersection(&usage[gpr].ranges[i], range))
-				return -1; /* forget it if usages overlap */
-
-			if (range->start >= usage[gpr].ranges[i].end)
-				best_start = MIN2(best_start, range->start - usage[gpr].ranges[i].end);
-
-			if (range->end != -1 && range->end <= usage[gpr].ranges[i].start)
-				best_end = MIN2(best_end, usage[gpr].ranges[i].start - range->end);
-		}
-	}
-	return best_start + best_end;
-}
-
-static void find_replacement(struct gpr_usage usage[128], unsigned current,
-				struct gpr_usage_range *range)
-{
-	unsigned i, j;
-	int best_gpr = -1, best_rate = 0x7FFFFFFF;
-
-	if (range->replacement == current)
-		return; /* register prefers to be not remapped */
-
-	if (range->replacement != -1 && range->replacement <= current) {
-		struct gpr_usage_range *other = find_src_range(&usage[range->replacement], range->start);
-		if (other && other->replacement != -1)
-			range->replacement = other->replacement;
-	}
-
-	if (range->replacement != -1 && range->replacement < current) {
-		int rate = rate_replacement(usage, current, range->replacement, range);
-
-		/* check if prefered replacement can be used */
-		if (rate != -1) {
-			best_rate = rate;
-			best_gpr = range->replacement;
-		}
-	}
-
-	if (best_gpr == -1 && (range->start & ~0xFF) == (range->end & ~0xFF)) {
-		/* register is just used inside one ALU clause */
-		/* try to use clause temporaries for it */
-		for (i = 127; i > 123; --i) {
-			int rate = rate_replacement(usage, current, i, range);
-
-			if (rate == -1) /* can't be used because ranges overlap */
-				continue;
-
-			if (rate < best_rate) {
-				best_rate = rate;
-				best_gpr = i;
-
-				/* can't get better than this */
-				if (rate == 0)
-					break;
-			}
-		}
-	}
-
-	if (best_gpr == -1) {
-		for (i = 0; i < current; ++i) {
-			int rate = rate_replacement(usage, current, i, range);
-
-			if (rate == -1) /* can't be used because ranges overlap */
-				continue;
-
-			if (rate < best_rate) {
-				best_rate = rate;
-				best_gpr = i;
-
-				/* can't get better than this */
-				if (rate == 0)
-					break;
-			}
-		}
-	}
-
-	if (best_gpr != -1) {
-		struct gpr_usage_range *reservation = add_gpr_usage_range(&usage[best_gpr]);
-		reservation->replacement = best_gpr;
-		reservation->rel_block = -1;
-		reservation->start = range->start;
-		reservation->end = range->end;
-	} else
-		best_gpr = current;
-
-	range->replacement = best_gpr;
-	if (range->rel_block == -1)
-		return; /* no relative block to handle we are done here */
-
-	/* set prefered register for the whole relative register block */
-	for (i = current + 1, ++best_gpr; i < current + range->rel_block; ++i, ++best_gpr) {
-		for (j = 0; j < usage[i].nranges; ++j) {
-			if (is_intersection(&usage[i].ranges[j], range))
-				usage[i].ranges[j].replacement = best_gpr;
-		}
-	}
-}
-
-static void replace_alu_gprs(struct r600_bc *bc, struct r600_bc_alu *alu, struct gpr_usage usage[128],
-				int id, int last_barrier, unsigned *barrier)
-{
-	struct gpr_usage *cur_usage;
-	struct gpr_usage_range *range;
-	unsigned src, num_src;
-
-	num_src = r600_bc_get_num_operands(bc, alu);
-	for (src = 0; src < num_src; ++src) {
-		// constants doesn't matter
-		if (!is_gpr(alu->src[src].sel))
-			continue;
-
-		cur_usage = &usage[alu->src[src].sel];
-		range = find_src_range(cur_usage, id);
-		alu->src[src].sel = range->replacement;
-
-		*barrier |= is_barrier_needed(cur_usage, id, alu->src[src].chan, last_barrier);
-	}
-
-	if (alu->dst.write) {
-		cur_usage = &usage[alu->dst.sel];
-		range = find_dst_range(cur_usage, id);
-		if (!range || range->replacement == -1) {
-			if (!alu->is_op3)
-				alu->dst.write = 0;
-			else
-				/*TODO: really check that register 123 is useable */
-				alu->dst.sel = 123;
-		} else {
-			alu->dst.sel = range->replacement;
-			*barrier |= is_barrier_needed(cur_usage, id, alu->dst.chan, last_barrier);
-		}
-	}
-	if (alu->dst.write) {
-		if (alu->dst.rel)
-			notice_gpr_rel_last_write(usage, id, alu->dst.chan);
-		else
-			notice_gpr_last_write(cur_usage, id, alu->dst.chan);
-	}
-}
-
-static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128],
-				int id, int last_barrier, unsigned *barrier)
-{
-	struct gpr_usage *cur_usage = &usage[tex->src_gpr];
-	struct gpr_usage_range *range = find_src_range(cur_usage, id);
-
-	if (tex->src_rel) {
-		*barrier = 1;
-        } else {
-		if (tex->src_sel_x < 4)
-			*barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_x, last_barrier);
-		if (tex->src_sel_y < 4)
-			*barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_y, last_barrier);
-		if (tex->src_sel_z < 4)
-			*barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_z, last_barrier);
-		if (tex->src_sel_w < 4)
-			*barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_w, last_barrier);
-	}
-	tex->src_gpr = range->replacement;
-
-	cur_usage = &usage[tex->dst_gpr];
-
-	range = find_dst_range(cur_usage, id);
-	if (range) {
-		tex->dst_gpr = range->replacement;
-
-		if (tex->dst_rel) {
-			if (tex->dst_sel_x != 7)
-				notice_gpr_rel_last_write(usage, id, tex->dst_sel_x);
-			if (tex->dst_sel_y != 7)
-				notice_gpr_rel_last_write(usage, id, tex->dst_sel_y);
-			if (tex->dst_sel_z != 7)
-				notice_gpr_rel_last_write(usage, id, tex->dst_sel_z);
-			if (tex->dst_sel_w != 7)
-				notice_gpr_rel_last_write(usage, id, tex->dst_sel_w);
-		} else {
-			if (tex->dst_sel_x != 7)
-				notice_gpr_last_write(cur_usage, id, tex->dst_sel_x);
-			if (tex->dst_sel_y != 7)
-				notice_gpr_last_write(cur_usage, id, tex->dst_sel_y);
-			if (tex->dst_sel_z != 7)
-				notice_gpr_last_write(cur_usage, id, tex->dst_sel_z);
-			if (tex->dst_sel_w != 7)
-				notice_gpr_last_write(cur_usage, id, tex->dst_sel_w);
-		}
-	} else {
-		tex->dst_gpr = 123;
-	}
-}
-
-static void replace_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128],
-				int id, int last_barrier, unsigned *barrier)
-{
-	struct gpr_usage *cur_usage = &usage[vtx->src_gpr];
-	struct gpr_usage_range *range = find_src_range(cur_usage, id);
-
-	*barrier |= is_barrier_needed(cur_usage, id, vtx->src_sel_x, last_barrier);
-
-	vtx->src_gpr = range->replacement;
-
-	cur_usage = &usage[vtx->dst_gpr];
-	range = find_dst_range(cur_usage, id);
-	if (range) {
-		vtx->dst_gpr = range->replacement;
-
-		if (vtx->dst_sel_x != 7)
-			notice_gpr_last_write(cur_usage, id, vtx->dst_sel_x);
-		if (vtx->dst_sel_y != 7)
-			notice_gpr_last_write(cur_usage, id, vtx->dst_sel_y);
-		if (vtx->dst_sel_z != 7)
-			notice_gpr_last_write(cur_usage, id, vtx->dst_sel_z);
-		if (vtx->dst_sel_w != 7)
-			notice_gpr_last_write(cur_usage, id, vtx->dst_sel_w);
-	} else {
-		vtx->dst_gpr = 123;
-	}
-}
-
-static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128],
-				int id, int last_barrier)
-{
-	//TODO handle other memory operations
-	struct gpr_usage *cur_usage = &usage[cf->output.gpr];
-	struct gpr_usage_range *range = find_src_range(cur_usage, id);
-
-	cf->barrier = 0;
-	if (cf->output.swizzle_x < 4)
-		cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_x, last_barrier);
-	if (cf->output.swizzle_y < 4)
-		cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_y, last_barrier);
-	if (cf->output.swizzle_z < 4)
-		cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_z, last_barrier);
-	if (cf->output.swizzle_w < 4)
-		cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_w, last_barrier);
-
-	cf->output.gpr = range->replacement;
-}
-
-static void optimize_alu_inst(struct r600_bc *bc, struct r600_bc_cf *cf, struct r600_bc_alu *alu)
-{
-	struct r600_bc_alu *alu_next;
-	unsigned chan;
-	unsigned src, num_src;
-
-	/* check if a MOV could be optimized away */
-	if (alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV) {
-
-		/* destination equals source? */
-		if (alu->dst.sel != alu->src[0].sel ||
-			alu->dst.chan != alu->src[0].chan)
-			return;
-
-		/* any special handling for the source? */
-		if (alu->src[0].rel || alu->src[0].neg || alu->src[0].abs)
-			return;
-
-		/* any special handling for destination? */
-		if (alu->dst.rel || alu->dst.clamp)
-			return;
-
-		/* ok find next instruction group and check if ps/pv is used */
-		for (alu_next = alu; !alu_next->last; alu_next = NEXT_ALU(alu_next));
-
-		if (alu_next->list.next != &cf->alu) {
-			chan = is_alu_reduction_inst(bc, alu) ? 0 : alu->dst.chan;
-			for (alu_next = NEXT_ALU(alu_next); alu_next; alu_next = NEXT_ALU(alu_next)) {
-				num_src = r600_bc_get_num_operands(bc, alu_next);
-				for (src = 0; src < num_src; ++src) {
-					if (alu_next->src[src].sel == V_SQ_ALU_SRC_PV &&
-						alu_next->src[src].chan == chan)
-						return;
-
-					if (alu_next->src[src].sel == V_SQ_ALU_SRC_PS)
-						return;
-				}
-
-				if (alu_next->last)
-					break;
-			}
-		}
-
-		r600_bc_remove_alu(cf, alu);
-	}
-}
-
-static void optimize_export_inst(struct r600_bc *bc, struct r600_bc_cf *cf)
-{
-	struct r600_bc_cf *prev = LIST_ENTRY(struct r600_bc_cf, cf->list.prev, list);
-	if (&prev->list == &bc->cf ||
-		prev->inst != cf->inst ||
-		prev->output.type != cf->output.type ||
-		prev->output.elem_size != cf->output.elem_size ||
-		prev->output.swizzle_x != cf->output.swizzle_x ||
-		prev->output.swizzle_y != cf->output.swizzle_y ||
-		prev->output.swizzle_z != cf->output.swizzle_z ||
-		prev->output.swizzle_w != cf->output.swizzle_w)
-		return;
-
-	if ((prev->output.burst_count + cf->output.burst_count) > 16)
-		return;
-
-	if ((prev->output.gpr + prev->output.burst_count) == cf->output.gpr &&
-		(prev->output.array_base + prev->output.burst_count) == cf->output.array_base) {
-
-		prev->output.burst_count += cf->output.burst_count;
-		r600_bc_remove_cf(bc, cf);
-
-	} else if (prev->output.gpr == (cf->output.gpr + cf->output.burst_count) &&
-		prev->output.array_base == (cf->output.array_base + cf->output.burst_count)) {
-
-		cf->output.burst_count += prev->output.burst_count;
-		r600_bc_remove_cf(bc, prev);
-	}
-}
-
-static void r600_bc_optimize(struct r600_bc *bc)
-{
-	struct r600_bc_cf *cf, *next_cf;
-	struct r600_bc_alu *first, *next_alu;
-	struct r600_bc_alu *alu;
-	struct r600_bc_vtx *vtx;
-	struct r600_bc_tex *tex;
-	struct gpr_usage usage[128];
-
-	/* assume that each gpr is exported only once */
-	struct r600_bc_cf *export_cf[128] = { NULL };
-	int export_remap[128];
-
-	int id, cond_start, barrier[bc->nstack];
-	unsigned i, j, stack, predicate, old_stack;
-
-	memset(&usage, 0, sizeof(usage));
-	for (i = 0; i < 128; ++i) {
-		usage[i].first_write = -1;
-		usage[i].last_write[0] = -1;
-		usage[i].last_write[1] = -1;
-		usage[i].last_write[2] = -1;
-		usage[i].last_write[3] = -1;
-	}
-
-	/* first gather some informations about the gpr usage */
-	id = 0; stack = 0;
-	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
-		old_stack = stack;
-		if (stack == 0)
-			cond_start = stack;
-
-		switch (r600_bc_cf_class(cf)) {
-		case CF_CLASS_ALU:
-			predicate = 0;
-			first = NULL;
-			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
-				if (!first)
-					first = alu;
-				notice_alu_src_gprs(bc, alu, usage, id);
-				if (alu->last) {
-					notice_alu_dst_gprs(first, usage, id, predicate || stack > 0);
-					first = NULL;
-					++id;
-				}
-				if (is_alu_pred_inst(bc, alu))
-					predicate++;
-			}
-			if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3)
-				stack += predicate;
-			else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)
-				stack -= 1;
-			else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)
-				stack -= 2;
-			break;
-		case CF_CLASS_TEXTURE:
-			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
-				notice_tex_gprs(bc, tex, usage, id++, stack > 0);
-			}
-			break;
-		case CF_CLASS_VERTEX:
-			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
-				notice_vtx_gprs(vtx, usage, id++, stack > 0);
-			}
-			break;
-		case CF_CLASS_EXPORT:
-			notice_export_gprs(cf, usage, export_cf, export_remap);
-			continue; // don't increment id
-		case CF_CLASS_OTHER:
-			switch (cf->inst) {
-			case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
-			case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
-			case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
-				break;
-
-			case V_SQ_CF_WORD1_SQ_CF_INST_POP:
-				stack -= cf->pop_count;
-				break;
-
-			default:
-				// TODO implement loop handling
-				goto out;
-			}
-		}
-
-		/* extend last_write after conditional block */
-		if (stack == 0 && old_stack != 0)
-			for (i = 0; i < 128; ++i)
-				for (j = 0; j < 4; ++j)
-					if (usage[i].last_write[j] >= cond_start)
-						usage[i].last_write[j] = id;
-
-		id += 0x100;
-	        id &= ~0xFF;
-	}
-	assert(stack == 0);
-
-	/* try to optimize gpr usage */
-	for (i = 0; i < 124; ++i) {
-		for (j = 0; j < usage[i].nranges; ++j) {
-			struct gpr_usage_range *range = &usage[i].ranges[j];
-			if (range->start == -1)
-				/* can't rearange shader inputs */
-				range->replacement = i;
-			else if (range->end == -1)
-				/* gpr isn't used any more after this instruction */
-				range->replacement = -1;
-			else
-				find_replacement(usage, i, range);
-
-			if (range->replacement == i)
-				bc->ngpr = i;
-			else if (range->replacement < i && range->replacement > bc->ngpr)
-				bc->ngpr = range->replacement;
-		}
-	}
-	bc->ngpr++;
-
-	/* apply the changes */
-	for (i = 0; i < 128; ++i) {
-		usage[i].last_write[0] = -1;
-		usage[i].last_write[1] = -1;
-		usage[i].last_write[2] = -1;
-		usage[i].last_write[3] = -1;
-	}
-	barrier[0] = 0;
-	id = 0; stack = 0;
-	LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
-		old_stack = stack;
-		switch (r600_bc_cf_class(cf)) {
-		case CF_CLASS_ALU:
-			predicate = 0;
-			first = NULL;
-			cf->barrier = 0;
-			LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
-				replace_alu_gprs(bc, alu, usage, id, barrier[stack], &cf->barrier);
-				if (alu->last)
-					++id;
-
-				if (is_alu_pred_inst(bc, alu))
-					predicate++;
-
-				if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)
-					optimize_alu_inst(bc, cf, alu);
-			}
-			if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3)
-				stack += predicate;
-			else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)
-				stack -= 1;
-			else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)
-				stack -= 2;
-			if (LIST_IS_EMPTY(&cf->alu)) {
-				r600_bc_remove_cf(bc, cf);
-				cf = NULL;
-			}
-			break;
-		case CF_CLASS_TEXTURE:
-			cf->barrier = 0;
-			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
-				replace_tex_gprs(tex, usage, id++, barrier[stack], &cf->barrier);
-			}
-			break;
-		case CF_CLASS_VERTEX:
-			cf->barrier = 0;
-			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
-				replace_vtx_gprs(vtx, usage, id++, barrier[stack], &cf->barrier);
-			}
-			break;
-		case CF_CLASS_EXPORT:
-			continue; // don't increment id
-		case CF_CLASS_OTHER:
-			if (cf->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
-				cf->barrier = 0;
-				stack -= cf->pop_count;
-			}
-			break;
-		}
-
-		id &= ~0xFF;
-		if (cf && cf->barrier)
-			barrier[old_stack] = id;
-
-		for (i = old_stack + 1; i <= stack; ++i)
-			barrier[i] = barrier[old_stack];
-
-		id += 0x100;
-		if (stack != 0) /* ensure exports are placed outside of conditional blocks */
-			continue;
-
-		for (i = 0; i < 128; ++i) {
-			if (!export_cf[i] || id < export_remap[i])
-				continue;
-
-			r600_bc_move_cf(bc, export_cf[i], next_cf);
-			replace_export_gprs(export_cf[i], usage, export_remap[i], barrier[stack]);
-			if (export_cf[i]->barrier)
-				barrier[stack] = id - 1;
-			next_cf = LIST_ENTRY(struct r600_bc_cf, export_cf[i]->list.next, list);
-			optimize_export_inst(bc, export_cf[i]);
-			export_cf[i] = NULL;
-		}
-	}
-	assert(stack == 0);
-
-out:
-	for (i = 0; i < 128; ++i) {
-		free(usage[i].ranges);
-	}
-}
-
 int r600_bc_build(struct r600_bc *bc)
 {
 	struct r600_bc_cf *cf;
 	struct r600_bc_alu *alu;
 	struct r600_bc_vtx *vtx;
 	struct r600_bc_tex *tex;
-	struct r600_bc_cf *exports[4] = { NULL };
 	uint32_t literal[4];
 	unsigned nliteral;
 	unsigned addr;
@@ -2460,26 +1547,37 @@ int r600_bc_build(struct r600_bc *bc)
 		bc->nstack = 1;
 	}
 
-	//r600_bc_optimize(bc);
-
 	/* first path compute addr of each CF block */
 	/* addr start after all the CF instructions */
-	addr = LIST_ENTRY(struct r600_bc_cf, bc->cf.prev, list)->id + 2;
+	addr = bc->cf_last->id + 2;
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
-		switch (r600_bc_cf_class(cf)) {
-		case CF_CLASS_ALU:
+		switch (cf->inst) {
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
 			break;
-		case CF_CLASS_TEXTURE:
-		case CF_CLASS_VERTEX:
+		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
 			/* fetch node need to be 16 bytes aligned*/
 			addr += 3;
 			addr &= 0xFFFFFFFCUL;
 			break;
-		case CF_CLASS_EXPORT:
-			if (cf->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT))
-				exports[cf->output.type] = cf;
+		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
 			break;
-		case CF_CLASS_OTHER:
+		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+		case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+		case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
 			break;
 		default:
 			R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
@@ -2489,14 +1587,6 @@ int r600_bc_build(struct r600_bc *bc)
 		addr += cf->ndw;
 		bc->ndw = cf->addr + cf->ndw;
 	}
-
-	/* set export done on last export of each type */
-	for (i = 0; i < 4; ++i) {
-		if (exports[i]) {
-			exports[i]->inst = BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
-		}
-	}
-
 	free(bc->bytecode);
 	bc->bytecode = calloc(1, bc->ndw * 4);
 	if (bc->bytecode == NULL)
@@ -2509,8 +1599,11 @@ int r600_bc_build(struct r600_bc *bc)
 			r = r600_bc_cf_build(bc, cf);
 		if (r)
 			return r;
-		switch (r600_bc_cf_class(cf)) {
-		case CF_CLASS_ALU:
+		switch (cf->inst) {
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
 			nliteral = 0;
 			memset(literal, 0, sizeof(literal));
 			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
@@ -2542,7 +1635,8 @@ int r600_bc_build(struct r600_bc *bc)
 				}
 			}
 			break;
-		case CF_CLASS_VERTEX:
+		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
 			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
 				r = r600_bc_vtx_build(bc, vtx, addr);
 				if (r)
@@ -2550,7 +1644,7 @@ int r600_bc_build(struct r600_bc *bc)
 				addr += 4;
 			}
 			break;
-		case CF_CLASS_TEXTURE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
 			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
 				r = r600_bc_tex_build(bc, tex, addr);
 				if (r)
@@ -2558,8 +1652,19 @@ int r600_bc_build(struct r600_bc *bc)
 				addr += 4;
 			}
 			break;
-		case CF_CLASS_EXPORT:
-		case CF_CLASS_OTHER:
+		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+		case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+		case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
 			break;
 		default:
 			R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
@@ -2635,10 +1740,13 @@ void r600_bc_dump(struct r600_bc *bc)
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
 		id = cf->id;
 
-		switch (r600_bc_cf_class(cf)) {
-		case CF_CLASS_ALU:
+		switch (cf->inst) {
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
 			fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
-			fprintf(stderr, "ADDR:%04d ", cf->addr);
+			fprintf(stderr, "ADDR:%d ", cf->addr);
 			fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode);
 			fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank);
 			fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank);
@@ -2648,22 +1756,22 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode);
 			fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr);
 			fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr);
-			fprintf(stderr, "BARRIER:%d ", cf->barrier);
 			fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
 			break;
-		case CF_CLASS_TEXTURE:
-		case CF_CLASS_VERTEX:
+		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
 			fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
-			fprintf(stderr, "ADDR:%04d\n", cf->addr);
+			fprintf(stderr, "ADDR:%d\n", cf->addr);
 			id++;
 			fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
 			fprintf(stderr, "INST:%d ", cf->inst);
-			fprintf(stderr, "BARRIER:%d ", cf->barrier);
 			fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
 			break;
-		case CF_CLASS_EXPORT:
+		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
 			fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
-			fprintf(stderr, "GPR:%d ", cf->output.gpr);
+			fprintf(stderr, "GPR:%X ", cf->output.gpr);
 			fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
 			fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base);
 			fprintf(stderr, "TYPE:%X\n", cf->output.type);
@@ -2673,18 +1781,26 @@ void r600_bc_dump(struct r600_bc *bc)
 			fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
 			fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
 			fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
-			fprintf(stderr, "BARRIER:%d ", cf->barrier);
-			fprintf(stderr, "INST:%d ", cf->inst);
-			fprintf(stderr, "BURST_COUNT:%d\n", cf->output.burst_count);
+			fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
+			fprintf(stderr, "INST:%d ", cf->output.inst);
+			fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
+			fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
 			break;
-		case CF_CLASS_OTHER:
+		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+		case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+		case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
 			fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
-			fprintf(stderr, "ADDR:%04d\n", cf->cf_addr);
+			fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
 			id++;
 			fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
 			fprintf(stderr, "INST:%d ", cf->inst);
 			fprintf(stderr, "COND:%X ", cf->cond);
-			fprintf(stderr, "BARRIER:%d ", cf->barrier);
 			fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
 			break;
 		}
@@ -3025,7 +2141,6 @@ int r600_vertex_elements_build_fetch_shader(struct r600_pipe_context *rctx, stru
 	}
 
 	r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
-	r600_bc_add_cfinst(&bc, BC_INST(&bc, V_SQ_CF_WORD1_SQ_CF_INST_NOP));
 
 	if ((r = r600_bc_build(&bc))) {
 		r600_bc_clear(&bc);
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index db8d0a4ed10..27ea293ebe5 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -109,6 +109,8 @@ struct r600_bc_vtx {
 struct r600_bc_output {
 	unsigned			array_base;
 	unsigned			type;
+	unsigned			end_of_program;
+	unsigned			inst;
 	unsigned			elem_size;
 	unsigned			gpr;
 	unsigned			swizzle_x;
@@ -116,6 +118,7 @@ struct r600_bc_output {
 	unsigned			swizzle_z;
 	unsigned			swizzle_w;
 	unsigned			burst_count;
+	unsigned			barrier;
 };
 
 struct r600_bc_kcache {
@@ -133,7 +136,6 @@ struct r600_bc_cf {
 	unsigned			cond;
 	unsigned			pop_count;
 	unsigned			cf_addr; /* control flow addr */
-	unsigned			barrier;
 	struct r600_bc_kcache		kcache[2];
 	unsigned			r6xx_uses_waterfall;
 	struct list_head		alu;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 12786daa464..e7285d624e3 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -567,7 +567,7 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
 	struct tgsi_full_property *property;
 	struct r600_shader_ctx ctx;
 	struct r600_bc_output output[32];
-	unsigned noutput;
+	unsigned output_done, noutput;
 	unsigned opcode;
 	int i, r = 0, pos0;
 
@@ -701,8 +701,10 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
 		output[i].swizzle_z = 2;
 		output[i].swizzle_w = 3;
 		output[i].burst_count = 1;
+		output[i].barrier = 1;
 		output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
 		output[i].array_base = i - pos0;
+		output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
 		switch (ctx.type) {
 		case TGSI_PROCESSOR_VERTEX:
 			if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
@@ -763,8 +765,10 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
 			output[i].swizzle_z = 2;
 			output[i].swizzle_w = 3;
 			output[i].burst_count = 1;
+			output[i].barrier = 1;
 			output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
 			output[i].array_base = 0;
+			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
 			noutput++;
 		}
 	}
@@ -778,10 +782,22 @@ static int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_sh
 		output[0].swizzle_z = 7;
 		output[0].swizzle_w = 7;
 		output[0].burst_count = 1;
+		output[0].barrier = 1;
 		output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
 		output[0].array_base = 0;
+		output[0].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT);
 		noutput++;
 	}
+	/* set export done on last export of each type */
+	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
+		if (i == (noutput - 1)) {
+			output[i].end_of_program = 1;
+		}
+		if (!(output_done & (1 << output[i].type))) {
+			output_done |= (1 << output[i].type);
+			output[i].inst = BC_INST(ctx.bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE);
+		}
+	}
 	/* add output to bytecode */
 	for (i = 0; i < noutput; i++) {
 		r = r600_bc_add_output(ctx.bc, &output[i]);
-- 
cgit v1.2.3


From e9b305c1002c05af0ed60715c8507c407f7febaa Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 19 Mar 2011 12:26:17 +0100
Subject: [g3dvl] merge fixes

---
 src/gallium/include/pipe/p_format.h | 38 ++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index f3b1e5561cf..542931ec1d8 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -196,33 +196,33 @@ enum pipe_format {
    PIPE_FORMAT_R8G8B8X8_UNORM          = 134,
    PIPE_FORMAT_B4G4R4X4_UNORM          = 135,
 
-   PIPE_FORMAT_YV12                  = 136,
-   PIPE_FORMAT_YV16                  = 137,
-   PIPE_FORMAT_IYUV                  = 138,  /**< aka I420 */
-   PIPE_FORMAT_NV12                  = 139,
-   PIPE_FORMAT_NV21                  = 140,
-   PIPE_FORMAT_AYUV                  = PIPE_FORMAT_A8R8G8B8_UNORM,
-   PIPE_FORMAT_VUYA                  = PIPE_FORMAT_B8G8R8A8_UNORM,
-   PIPE_FORMAT_XYUV                  = PIPE_FORMAT_X8R8G8B8_UNORM,
-   PIPE_FORMAT_VUYX                  = PIPE_FORMAT_B8G8R8X8_UNORM,
-   PIPE_FORMAT_IA44                  = 141,
-   PIPE_FORMAT_AI44                  = 142,
-
    /* some stencil samplers formats */
-   PIPE_FORMAT_X24S8_USCALED           = 143,
-   PIPE_FORMAT_S8X24_USCALED           = 144,
-   PIPE_FORMAT_X32_S8X24_USCALED       = 145,
+   PIPE_FORMAT_X24S8_USCALED           = 136,
+   PIPE_FORMAT_S8X24_USCALED           = 137,
+   PIPE_FORMAT_X32_S8X24_USCALED       = 138,
 
-   PIPE_FORMAT_B2G3R3_UNORM            = 146,
-   PIPE_FORMAT_L16A16_UNORM            = 147,
-   PIPE_FORMAT_A16_UNORM               = 148,
-   PIPE_FORMAT_I16_UNORM               = 149,
+   PIPE_FORMAT_B2G3R3_UNORM            = 139,
+   PIPE_FORMAT_L16A16_UNORM            = 140,
+   PIPE_FORMAT_A16_UNORM               = 141,
+   PIPE_FORMAT_I16_UNORM               = 142,
 
    PIPE_FORMAT_LATC1_UNORM             = 143,
    PIPE_FORMAT_LATC1_SNORM             = 144,
    PIPE_FORMAT_LATC2_UNORM             = 145,
    PIPE_FORMAT_LATC2_SNORM             = 146,
 
+   PIPE_FORMAT_YV12                    = 147,
+   PIPE_FORMAT_YV16                    = 148,
+   PIPE_FORMAT_IYUV                    = 149,  /**< aka I420 */
+   PIPE_FORMAT_NV12                    = 150,
+   PIPE_FORMAT_NV21                    = 151,
+   PIPE_FORMAT_AYUV                    = PIPE_FORMAT_A8R8G8B8_UNORM,
+   PIPE_FORMAT_VUYA                    = PIPE_FORMAT_B8G8R8A8_UNORM,
+   PIPE_FORMAT_XYUV                    = PIPE_FORMAT_X8R8G8B8_UNORM,
+   PIPE_FORMAT_VUYX                    = PIPE_FORMAT_B8G8R8X8_UNORM,
+   PIPE_FORMAT_IA44                    = 152,
+   PIPE_FORMAT_AI44                    = 153,
+
    PIPE_FORMAT_COUNT
 };
 
-- 
cgit v1.2.3


From 1a238efe424c666d730ffe91c01f49415797a7ca Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 20 Mar 2011 19:00:50 +0100
Subject: [g3dvl] fix power_of_two buffer with/height handling

---
 src/gallium/state_trackers/xorg/xvmc/surface.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 799dac2efc8..ef7ea920673 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -232,8 +232,14 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
       return BadAlloc;
    }
    template.last_level = 0;
-   template.width0 = util_next_power_of_two(context->width);
-   template.height0 = util_next_power_of_two(context->height);
+   if (vpipe->get_param(vpipe, PIPE_CAP_NPOT_TEXTURES)) {
+      template.width0 = context->width;
+      template.height0 = context->height;
+   }
+   else {
+      template.width0 = util_next_power_of_two(context->width);
+      template.height0 = util_next_power_of_two(context->height);
+   }
    template.depth0 = 1;
    template.array_size = 1;
    template.usage = PIPE_USAGE_DEFAULT;
-- 
cgit v1.2.3


From 713a52d8564193e222e16aad52758d8fa3b79635 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 20 Mar 2011 19:29:47 +0100
Subject: [g3dvl] cleanup idct init

---
 src/gallium/auxiliary/vl/vl_mpeg12_context.c | 117 ++++++++++++++++-----------
 1 file changed, 70 insertions(+), 47 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 74893ab4e3a..a79230d0b7f 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -258,8 +258,7 @@ vl_mpeg12_create_surface(struct pipe_video_context *vpipe,
 static boolean
 vl_mpeg12_is_format_supported(struct pipe_video_context *vpipe,
                               enum pipe_format format,
-                              unsigned usage,
-                              unsigned geom)
+                              unsigned usage)
 {
    struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
 
@@ -600,6 +599,51 @@ init_pipe_state(struct vl_mpeg12_context *ctx)
    return true;
 }
 
+static bool
+init_idct(struct vl_mpeg12_context *ctx, unsigned buffer_width, unsigned buffer_height)
+{
+   unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
+   struct pipe_resource *idct_matrix;
+
+   /* TODO: Implement 422, 444 */
+   assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+   ctx->empty_block_mask = &const_empty_block_mask_420;
+
+   if (!(idct_matrix = vl_idct_upload_matrix(ctx->pipe)))
+      return false;
+
+   if (!vl_idct_init(&ctx->idct_y, ctx->pipe, buffer_width, buffer_height,
+                     2, 2, TGSI_SWIZZLE_X, idct_matrix))
+      return false;
+
+   if (ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+      chroma_width = buffer_width / 2;
+      chroma_height = buffer_height / 2;
+      chroma_blocks_x = 1;
+      chroma_blocks_y = 1;
+   } else if (ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
+      chroma_width = buffer_width;
+      chroma_height = buffer_height / 2;
+      chroma_blocks_x = 2;
+      chroma_blocks_y = 1;
+   } else {
+      chroma_width = buffer_width;
+      chroma_height = buffer_height;
+      chroma_blocks_x = 2;
+      chroma_blocks_y = 2;
+   }
+
+   if(!vl_idct_init(&ctx->idct_cr, ctx->pipe, chroma_width, chroma_height,
+                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
+      return false;
+
+   if(!vl_idct_init(&ctx->idct_cb, ctx->pipe, chroma_width, chroma_height,
+                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
+      return false;
+
+   return true;
+}
+
 struct pipe_video_context *
 vl_create_mpeg12_context(struct pipe_context *pipe,
                          enum pipe_video_profile profile,
@@ -608,10 +652,8 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
                          bool pot_buffers,
                          enum pipe_format decode_format)
 {
-   struct pipe_resource *idct_matrix;
-   unsigned buffer_width, buffer_height;
-   unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
    struct vl_mpeg12_context *ctx;
+   unsigned buffer_width, buffer_height;
 
    assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
 
@@ -620,12 +662,6 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
    if (!ctx)
       return NULL;
 
-   /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
-   assert(pot_buffers);
-
-   buffer_width = pot_buffers ? util_next_power_of_two(width) : width;
-   buffer_height = pot_buffers ? util_next_power_of_two(height) : height;
-
    ctx->base.profile = profile;
    ctx->base.chroma_format = chroma_format;
    ctx->base.width = width;
@@ -666,44 +702,22 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
       return NULL;
    }
 
-   /* TODO: Implement 422, 444 */
-   assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-   ctx->empty_block_mask = &const_empty_block_mask_420;
-
-   if (!(idct_matrix = vl_idct_upload_matrix(ctx->pipe)))
-      return false;
-
-   if (!vl_idct_init(&ctx->idct_y, ctx->pipe, buffer_width, buffer_height,
-                     2, 2, TGSI_SWIZZLE_X, idct_matrix))
-      return false;
+   /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
+   assert(pot_buffers);
+   buffer_width = pot_buffers ? util_next_power_of_two(width) : width;
+   buffer_height = pot_buffers ? util_next_power_of_two(height) : height;
 
-   if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      chroma_width = buffer_width / 2;
-      chroma_height = buffer_height / 2;
-      chroma_blocks_x = 1;
-      chroma_blocks_y = 1;
-   } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
-      chroma_width = buffer_width;
-      chroma_height = buffer_height / 2;
-      chroma_blocks_x = 2;
-      chroma_blocks_y = 1;
-   } else {
-      chroma_width = buffer_width;
-      chroma_height = buffer_height;
-      chroma_blocks_x = 2;
-      chroma_blocks_y = 2;
+   if (!init_idct(ctx, buffer_width, buffer_height)) {
+      ctx->pipe->destroy(ctx->pipe);
+      FREE(ctx);
+      return NULL;
    }
 
-   if(!vl_idct_init(&ctx->idct_cr, ctx->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
-      return false;
-
-   if(!vl_idct_init(&ctx->idct_cb, ctx->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
-      return false;
-
    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
                                    buffer_width, buffer_height, chroma_format)) {
+      vl_idct_cleanup(&ctx->idct_y);
+      vl_idct_cleanup(&ctx->idct_cr);
+      vl_idct_cleanup(&ctx->idct_cb);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
@@ -711,6 +725,9 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
 
    ctx->buffer_map = util_new_keymap(sizeof(unsigned), -1, delete_buffer);
    if (!ctx->buffer_map) {
+      vl_idct_cleanup(&ctx->idct_y);
+      vl_idct_cleanup(&ctx->idct_cr);
+      vl_idct_cleanup(&ctx->idct_cb);
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
@@ -718,17 +735,23 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
    }
 
    if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) {
-      util_delete_keymap(ctx->buffer_map, ctx);
+      vl_idct_cleanup(&ctx->idct_y);
+      vl_idct_cleanup(&ctx->idct_cr);
+      vl_idct_cleanup(&ctx->idct_cb);
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+      util_delete_keymap(ctx->buffer_map, ctx);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
    }
 
    if (!init_pipe_state(ctx)) {
-      vl_compositor_cleanup(&ctx->compositor);
-      util_delete_keymap(ctx->buffer_map, ctx);
+      vl_idct_cleanup(&ctx->idct_y);
+      vl_idct_cleanup(&ctx->idct_cr);
+      vl_idct_cleanup(&ctx->idct_cb);
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+      util_delete_keymap(ctx->buffer_map, ctx);
+      vl_compositor_cleanup(&ctx->compositor);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
-- 
cgit v1.2.3


From dd6cd206a6395be651bc965580e17c0d63513c7b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 20 Mar 2011 19:45:06 +0100
Subject: [g3dvl] correctly implement non power of two buffers

---
 src/gallium/auxiliary/vl/vl_mpeg12_context.c  | 9 ++-------
 src/gallium/auxiliary/vl/vl_mpeg12_context.h  | 5 +++--
 src/gallium/drivers/r600/r600_video_context.c | 2 +-
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index a79230d0b7f..781ff2a4085 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -228,11 +228,7 @@ vl_mpeg12_get_param(struct pipe_video_context *vpipe, int param)
 
    switch (param) {
       case PIPE_CAP_NPOT_TEXTURES:
-         /* XXX: Temporary; not all paths are NPOT-tested */
-#if 0
-         return ctx->pipe->screen->get_param(ctx->pipe->screen, param);
-#endif
-         return FALSE;
+         return !ctx->pot_buffers;
       case PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT:
          return ctx->decode_format;
       default:
@@ -691,6 +687,7 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
 
    ctx->pipe = pipe;
    ctx->decode_format = decode_format;
+   ctx->pot_buffers = pot_buffers;
 
    ctx->quads = vl_vb_upload_quads(ctx->pipe, 2, 2);
    ctx->vertex_buffer_size = width / MACROBLOCK_WIDTH * height / MACROBLOCK_HEIGHT;
@@ -702,8 +699,6 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
       return NULL;
    }
 
-   /* TODO: Non-pot buffers untested, probably doesn't work without changes to texcoord generation, vert shader, etc */
-   assert(pot_buffers);
    buffer_width = pot_buffers ? util_next_power_of_two(width) : width;
    buffer_height = pot_buffers ? util_next_power_of_two(height) : height;
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
index 6c964fbe92a..c88c436ad73 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -40,6 +40,9 @@ struct vl_mpeg12_context
 {
    struct pipe_video_context base;
    struct pipe_context *pipe;
+   enum pipe_format decode_format;
+   bool pot_buffers;
+
    struct pipe_surface *decode_target;
    const unsigned (*empty_block_mask)[3][2][2];
 
@@ -57,8 +60,6 @@ struct vl_mpeg12_context
    void *rast;
    void *dsa;
    void *blend;
-
-   enum pipe_format decode_format;
 };
 
 struct vl_mpeg12_buffer
diff --git a/src/gallium/drivers/r600/r600_video_context.c b/src/gallium/drivers/r600/r600_video_context.c
index 0b915d62143..8190c9ae612 100644
--- a/src/gallium/drivers/r600/r600_video_context.c
+++ b/src/gallium/drivers/r600/r600_video_context.c
@@ -47,7 +47,7 @@ r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
          return vl_create_mpeg12_context(pipe, profile,
                                          chroma_format,
                                          width, height,
-                                         true,
+                                         false,
                                          PIPE_FORMAT_XYUV);
       default:
          return NULL;
-- 
cgit v1.2.3


From bac8760f7f3523e9b6d5b2fd7cd46091d4883f5e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 20 Mar 2011 21:34:38 +0100
Subject: [g3dvl] rename motion vector fields

---
 src/gallium/auxiliary/vl/vl_vertex_buffers.c   | 64 ++++++++++++--------------
 src/gallium/include/pipe/p_video_state.h       | 12 +++--
 src/gallium/state_trackers/xorg/xvmc/surface.c | 44 +++++++++---------
 3 files changed, 62 insertions(+), 58 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index c5366e8e4fc..928910f572e 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -212,58 +212,54 @@ get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4])
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
       {
          if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            mv[2].x = mb->pmv[0][1][0];
-            mv[2].y = mb->pmv[0][1][1];
+            mv[2].x = mb->mv[1].top.x;
+            mv[2].y = mb->mv[1].top.y;
 
          } else {
-            mv[2].x = mb->pmv[0][1][0];
-            mv[2].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
+            mv[2].x = mb->mv[1].top.x;
+            mv[2].y = mb->mv[1].top.y - (mb->mv[1].top.y % 4);
 
-            mv[3].x = mb->pmv[1][1][0];
-            mv[3].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
+            mv[3].x = mb->mv[1].bottom.x;
+            mv[3].y = mb->mv[1].bottom.y - (mb->mv[1].bottom.y % 4);
 
-            if(mb->mvfs[0][1]) mv[2].y += 2;
-            if(!mb->mvfs[1][1]) mv[3].y -= 2;
+            if (mb->mv[1].top.field_select) mv[2].y += 2;
+            if (!mb->mv[1].bottom.field_select) mv[3].y -= 2;
          }
 
          /* fall-through */
       }
       case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
       {
-         if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
+         if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+            mv[0].x = mb->mv[0].top.x;
+            mv[0].y = mb->mv[0].top.y;
 
-            if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-               mv[0].x = mb->pmv[0][1][0];
-               mv[0].y = mb->pmv[0][1][1];
+         } else {
+            mv[0].x = mb->mv[0].top.x;
+            mv[0].y = mb->mv[0].top.y - (mb->mv[0].top.y % 4);
 
-            } else {
-               mv[0].x = mb->pmv[0][1][0];
-               mv[0].y = mb->pmv[0][1][1] - (mb->pmv[0][1][1] % 4);
+            mv[1].x = mb->mv[0].bottom.x;
+            mv[1].y = mb->mv[0].bottom.y - (mb->mv[0].bottom.y % 4);
 
-               mv[1].x = mb->pmv[1][1][0];
-               mv[1].y = mb->pmv[1][1][1] - (mb->pmv[1][1][1] % 4);
+            if (mb->mv[0].top.field_select) mv[0].y += 2;
+            if (!mb->mv[0].bottom.field_select) mv[1].y -= 2;
+         }
+         break;
 
-               if(mb->mvfs[0][1]) mv[0].y += 2;
-               if(!mb->mvfs[1][1]) mv[1].y -= 2;
-            }
+      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
+         if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+            mv[0].x = mb->mv[1].top.x;
+            mv[0].y = mb->mv[1].top.y;
 
          } else {
+            mv[0].x = mb->mv[1].top.x;
+            mv[0].y = mb->mv[1].top.y - (mb->mv[1].top.y % 4);
 
-            if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-               mv[0].x = mb->pmv[0][0][0];
-               mv[0].y = mb->pmv[0][0][1];
-
-            } else {
-               mv[0].x = mb->pmv[0][0][0];
-               mv[0].y = mb->pmv[0][0][1] - (mb->pmv[0][0][1] % 4);
-
-               mv[1].x = mb->pmv[1][0][0];
-               mv[1].y = mb->pmv[1][0][1] - (mb->pmv[1][0][1] % 4);
+            mv[1].x = mb->mv[1].bottom.x;
+            mv[1].y = mb->mv[1].bottom.y - (mb->mv[1].bottom.y % 4);
 
-               if(mb->mvfs[0][0]) mv[0].y += 2;
-               if(!mb->mvfs[1][0]) mv[1].y -= 2;
-            }
+            if (mb->mv[1].top.field_select) mv[0].y += 2;
+            if (!mb->mv[1].bottom.field_select) mv[1].y -= 2;
          }
       }
       default:
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 1cb555b5381..c620472283d 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -74,12 +74,17 @@ enum pipe_mpeg12_dct_type
    PIPE_MPEG12_DCT_TYPE_FRAME
 };
 
-
 struct pipe_macroblock
 {
    enum pipe_video_codec codec;
 };
 
+struct pipe_mpeg12_motionvector
+{
+   signed x, y;
+   bool field_select;
+};
+
 struct pipe_mpeg12_macroblock
 {
    struct pipe_macroblock base;
@@ -89,8 +94,9 @@ struct pipe_mpeg12_macroblock
    enum pipe_mpeg12_macroblock_type mb_type;
    enum pipe_mpeg12_motion_type mo_type;
    enum pipe_mpeg12_dct_type dct_type;
-   signed pmv[2][2][2];
-   bool mvfs[2][2];
+   struct {
+      struct pipe_mpeg12_motionvector top, bottom;
+   } mv[2];
    unsigned cbp;
    short *blocks;
 };
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index ef7ea920673..5896cdca76d 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -153,45 +153,47 @@ MacroBlocksToPipe(struct pipe_screen *screen,
                   const XvMCBlockArray *xvmc_blocks,
                   unsigned int first_macroblock,
                   unsigned int num_macroblocks,
-                  struct pipe_mpeg12_macroblock *pipe_macroblocks)
+                  struct pipe_mpeg12_macroblock *mb)
 {
    unsigned int i, j, k, l;
    XvMCMacroBlock *xvmc_mb;
 
    assert(xvmc_macroblocks);
    assert(xvmc_blocks);
-   assert(pipe_macroblocks);
+   assert(mb);
    assert(num_macroblocks);
 
    xvmc_mb = xvmc_macroblocks->macro_blocks + first_macroblock;
 
    for (i = 0; i < num_macroblocks; ++i) {
-      pipe_macroblocks->base.codec = PIPE_VIDEO_CODEC_MPEG12;
-      pipe_macroblocks->mbx = xvmc_mb->x;
-      pipe_macroblocks->mby = xvmc_mb->y;
-      pipe_macroblocks->mb_type = TypeToPipe(xvmc_mb->macroblock_type);
-      if (pipe_macroblocks->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA)
-         pipe_macroblocks->mo_type = MotionToPipe(xvmc_mb->motion_type, xvmc_picture_structure);
+      mb->base.codec = PIPE_VIDEO_CODEC_MPEG12;
+      mb->mbx = xvmc_mb->x;
+      mb->mby = xvmc_mb->y;
+      mb->mb_type = TypeToPipe(xvmc_mb->macroblock_type);
+      if (mb->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA)
+         mb->mo_type = MotionToPipe(xvmc_mb->motion_type, xvmc_picture_structure);
       /* Get rid of Valgrind 'undefined' warnings */
       else
-         pipe_macroblocks->mo_type = -1;
-      pipe_macroblocks->dct_type = xvmc_mb->dct_type == XVMC_DCT_TYPE_FIELD ?
+         mb->mo_type = -1;
+      mb->dct_type = xvmc_mb->dct_type == XVMC_DCT_TYPE_FIELD ?
          PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
 
-      for (j = 0; j < 2; ++j)
-         for (k = 0; k < 2; ++k)
-            for (l = 0; l < 2; ++l)
-               pipe_macroblocks->pmv[j][k][l] = xvmc_mb->PMV[j][k][l];
+      for (j = 0; j < 2; ++j) {
+         mb->mv[j].top.x = xvmc_mb->PMV[0][j][0];
+         mb->mv[j].top.y = xvmc_mb->PMV[0][j][1];
+         mb->mv[j].bottom.x = xvmc_mb->PMV[1][j][0];
+         mb->mv[j].bottom.y = xvmc_mb->PMV[1][j][1];
+      }
 
-      pipe_macroblocks->mvfs[0][0] = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_FIRST_FORWARD;
-      pipe_macroblocks->mvfs[0][1] = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_FIRST_BACKWARD;
-      pipe_macroblocks->mvfs[1][0] = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_SECOND_FORWARD;
-      pipe_macroblocks->mvfs[1][1] = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_SECOND_BACKWARD;
+      mb->mv[0].top.field_select = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_FIRST_FORWARD;
+      mb->mv[1].top.field_select = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_FIRST_BACKWARD;
+      mb->mv[0].bottom.field_select = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_SECOND_FORWARD;
+      mb->mv[1].bottom.field_select = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_SECOND_BACKWARD;
 
-      pipe_macroblocks->cbp = xvmc_mb->coded_block_pattern;
-      pipe_macroblocks->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
+      mb->cbp = xvmc_mb->coded_block_pattern;
+      mb->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
 
-      ++pipe_macroblocks;
+      ++mb;
       ++xvmc_mb;
    }
 }
-- 
cgit v1.2.3


From f2c6affa365eee55d2e5a18f889691900711583e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 20 Mar 2011 22:14:49 +0100
Subject: [g3dvl] simplify motion vector calculation

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  8 +--
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     | 70 +++++++++---------------
 2 files changed, 29 insertions(+), 49 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index d45b0642e3c..4f0b9ada73a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -314,6 +314,9 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
       ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY),
                ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
                tc[1], tc[0]);
+      ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY),
+               ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+               tc[3], tc[2]);
 
       ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_X), &bi_label);
 
@@ -321,7 +324,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
           * result = tex(field.z ? tc[1] : tc[0], sampler[bkwd_pred ? 1 : 0])
           */
          ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_Z), &label);
-            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[1]);
+            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
          ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
          ureg_ELSE(shader, &label);
             ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
@@ -337,9 +340,6 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
           * else
           *    ref[0..1] = tex(tc[2..3], sampler[0..1])
           */
-         ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
-            tc[3], tc[2]);
          ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
          ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 928910f572e..f791d4eca8d 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -208,62 +208,42 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 static void
 get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4])
 {
-   switch (mb->mb_type) {
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
-      {
-         if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            mv[2].x = mb->mv[1].top.x;
-            mv[2].y = mb->mv[1].top.y;
+   if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BI ||
+       mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_FWD) {
 
-         } else {
-            mv[2].x = mb->mv[1].top.x;
-            mv[2].y = mb->mv[1].top.y - (mb->mv[1].top.y % 4);
+      if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+         mv[0].x = mb->mv[0].top.x;
+         mv[0].y = mb->mv[0].top.y;
 
-            mv[3].x = mb->mv[1].bottom.x;
-            mv[3].y = mb->mv[1].bottom.y - (mb->mv[1].bottom.y % 4);
+      } else {
+         mv[0].x = mb->mv[0].top.x;
+         mv[0].y = mb->mv[0].top.y - (mb->mv[0].top.y % 4);
 
-            if (mb->mv[1].top.field_select) mv[2].y += 2;
-            if (!mb->mv[1].bottom.field_select) mv[3].y -= 2;
-         }
+         mv[1].x = mb->mv[0].bottom.x;
+         mv[1].y = mb->mv[0].bottom.y - (mb->mv[0].bottom.y % 4);
 
-         /* fall-through */
+         if (mb->mv[0].top.field_select) mv[0].y += 2;
+         if (!mb->mv[0].bottom.field_select) mv[1].y -= 2;
       }
-      case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
-      {
-         if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            mv[0].x = mb->mv[0].top.x;
-            mv[0].y = mb->mv[0].top.y;
+   }
 
-         } else {
-            mv[0].x = mb->mv[0].top.x;
-            mv[0].y = mb->mv[0].top.y - (mb->mv[0].top.y % 4);
+   if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BI ||
+       mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
 
-            mv[1].x = mb->mv[0].bottom.x;
-            mv[1].y = mb->mv[0].bottom.y - (mb->mv[0].bottom.y % 4);
+      if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+         mv[2].x = mb->mv[1].top.x;
+         mv[2].y = mb->mv[1].top.y;
 
-            if (mb->mv[0].top.field_select) mv[0].y += 2;
-            if (!mb->mv[0].bottom.field_select) mv[1].y -= 2;
-         }
-         break;
+      } else {
+         mv[2].x = mb->mv[1].top.x;
+         mv[2].y = mb->mv[1].top.y - (mb->mv[1].top.y % 4);
 
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
-         if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-            mv[0].x = mb->mv[1].top.x;
-            mv[0].y = mb->mv[1].top.y;
-
-         } else {
-            mv[0].x = mb->mv[1].top.x;
-            mv[0].y = mb->mv[1].top.y - (mb->mv[1].top.y % 4);
-
-            mv[1].x = mb->mv[1].bottom.x;
-            mv[1].y = mb->mv[1].bottom.y - (mb->mv[1].bottom.y % 4);
+         mv[3].x = mb->mv[1].bottom.x;
+         mv[3].y = mb->mv[1].bottom.y - (mb->mv[1].bottom.y % 4);
 
-            if (mb->mv[1].top.field_select) mv[0].y += 2;
-            if (!mb->mv[1].bottom.field_select) mv[1].y -= 2;
-         }
+         if (mb->mv[1].top.field_select) mv[2].y += 2;
+         if (!mb->mv[1].bottom.field_select) mv[3].y -= 2;
       }
-      default:
-         break;
    }
 }
 
-- 
cgit v1.2.3


From 52766c2c373a7ac484ae01dca3ab409e808709b2 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 20 Mar 2011 22:50:09 +0100
Subject: [g3dvl] handle different mc types more similary

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 74 ++++++++----------------
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     | 28 ++++-----
 2 files changed, 37 insertions(+), 65 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 4f0b9ada73a..6cd811b4766 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -106,12 +106,11 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
     * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0]
     *
     * o_frame_pred = frame_pred
-    * o_info.x = ref_frames
-    * o_info.y = ref_frames > 0
-    * o_info.z = bkwd_pred
+    * o_info.x = not_intra
+    * o_info.y = ref_weight / 2
     *
     * // Apply motion vectors
-    * o_vmv[0..count] = t_vpos + vmv[0..count] * mv_scale
+    * o_vmv[0..3] = t_vpos + vmv[0..3] * mv_scale
     *
     * o_line.xy = vrect * 8
     * o_line.z = interlaced
@@ -152,24 +151,13 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
             eb[1][1], eb[1][0]);
 
    ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_X),
-            ureg_scalar(eb[1][1], TGSI_SWIZZLE_W));
-   ureg_SGE(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Y),
-      ureg_scalar(eb[1][1], TGSI_SWIZZLE_W), ureg_imm1f(shader, 0.0f));
-   ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Z),
             ureg_scalar(eb[1][0], TGSI_SWIZZLE_W));
+   ureg_MUL(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Y),
+            ureg_scalar(eb[1][1], TGSI_SWIZZLE_W),
+            ureg_imm1f(shader, 0.5f));
 
-   ureg_MAD(shader, ureg_writemask(o_vmv[0], TGSI_WRITEMASK_XY), mv_scale, vmv[0], ureg_src(t_vpos));
-   ureg_MAD(shader, ureg_writemask(o_vmv[2], TGSI_WRITEMASK_XY), mv_scale, vmv[2], ureg_src(t_vpos));
-
-   ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(eb[0][1], TGSI_SWIZZLE_W)),
-            vmv[0], vmv[1]);
-   ureg_MAD(shader, ureg_writemask(o_vmv[1], TGSI_WRITEMASK_XY), mv_scale, ureg_src(t_vmv), ureg_src(t_vpos));
-
-   ureg_CMP(shader, ureg_writemask(t_vmv, TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(eb[0][1], TGSI_SWIZZLE_W)),
-            vmv[2], vmv[3]);
-   ureg_MAD(shader, ureg_writemask(o_vmv[3], TGSI_WRITEMASK_XY), mv_scale, ureg_src(t_vmv), ureg_src(t_vpos));
+   for (i = 0; i < 4; ++i)
+      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
 
    ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
@@ -310,7 +298,14 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 
    ureg_MOV(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.5f));
 
-   ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_Y), &intra_label);
+   ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_X), &intra_label);
+      /*
+       * if (field.z)
+       *    ref[0..1] = tex(tc[0..1], sampler[0..1])
+       * else
+       *    ref[0..1] = tex(tc[2..3], sampler[0..1])
+       * result = LRP(info.y, ref[0..1])
+       */
       ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY),
                ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
                tc[1], tc[0]);
@@ -318,36 +313,13 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
                ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
                tc[3], tc[2]);
 
-      ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_X), &bi_label);
-
-         /*
-          * result = tex(field.z ? tc[1] : tc[0], sampler[bkwd_pred ? 1 : 0])
-          */
-         ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_Z), &label);
-            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
-         ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
-         ureg_ELSE(shader, &label);
-            ureg_TEX(shader, result, TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
-         ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
-         ureg_ENDIF(shader);
-
-      ureg_fixup_label(shader, bi_label, ureg_get_instruction_number(shader));
-      ureg_ELSE(shader, &bi_label);
-
-         /*
-          * if (field.z)
-          *    ref[0..1] = tex(tc[0..1], sampler[0..1])
-          * else
-          *    ref[0..1] = tex(tc[2..3], sampler[0..1])
-          */
-         ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
-         ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
-
-         ureg_LRP(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.5f),
-            ureg_src(ref[0]), ureg_src(ref[1]));
-
-      ureg_fixup_label(shader, bi_label, ureg_get_instruction_number(shader));
-      ureg_ENDIF(shader);
+      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
+      ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
+
+      ureg_LRP(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ),
+               ureg_scalar(info, TGSI_SWIZZLE_Y),
+               ureg_src(ref[1]), ureg_src(ref[0]));
+
    ureg_fixup_label(shader, intra_label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index f791d4eca8d..bbac8902977 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -212,8 +212,8 @@ get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4])
        mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_FWD) {
 
       if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-         mv[0].x = mb->mv[0].top.x;
-         mv[0].y = mb->mv[0].top.y;
+         mv[0].x = mv[1].x = mb->mv[0].top.x;
+         mv[0].y = mv[1].y = mb->mv[0].top.y;
 
       } else {
          mv[0].x = mb->mv[0].top.x;
@@ -225,14 +225,16 @@ get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4])
          if (mb->mv[0].top.field_select) mv[0].y += 2;
          if (!mb->mv[0].bottom.field_select) mv[1].y -= 2;
       }
+   } else {
+      mv[0].x = mv[0].y = mv[1].x = mv[1].y = 0x8000;
    }
 
    if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BI ||
        mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
 
       if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-         mv[2].x = mb->mv[1].top.x;
-         mv[2].y = mb->mv[1].top.y;
+         mv[2].x = mv[3].x = mb->mv[1].top.x;
+         mv[2].y = mv[3].y = mb->mv[1].top.y;
 
       } else {
          mv[2].x = mb->mv[1].top.x;
@@ -244,6 +246,8 @@ get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4])
          if (mb->mv[1].top.field_select) mv[2].y += 2;
          if (!mb->mv[1].bottom.field_select) mv[3].y -= 2;
       }
+   } else {
+      mv[2].x = mv[2].y = mv[3].x = mv[3].y = 0x8000;
    }
 }
 
@@ -274,23 +278,19 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *
    }
    stream->eb[0][0].flag = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
    stream->eb[0][1].flag = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME;
-   stream->eb[1][0].flag = mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD;
+   stream->eb[1][0].flag = mb->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA;
    switch (mb->mb_type) {
-      case PIPE_MPEG12_MACROBLOCK_TYPE_INTRA:
-         stream->eb[1][1].flag = -1;
-         break;
-
       case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
-         stream->eb[1][1].flag = 1;
+         stream->eb[1][1].flag = 0;
          break;
 
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
-         stream->eb[1][1].flag = 0;
+         stream->eb[1][1].flag = 1;
          break;
 
-      default:
-         assert(0);
+      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
+         stream->eb[1][1].flag = 2;
+         break;
    }
 
    get_motion_vectors(mb, stream->mv);
-- 
cgit v1.2.3


From ba0bff85307c9b107cf432da05b39fd4ab242579 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 22 Mar 2011 19:58:21 +0100
Subject: [g3dvl] make video buffer a public available interface

---
 src/gallium/auxiliary/vl/vl_mpeg12_context.c       | 361 +++++++++++----------
 src/gallium/auxiliary/vl/vl_mpeg12_context.h       |   9 +-
 src/gallium/include/pipe/p_video_context.h         |  44 ++-
 src/gallium/state_trackers/xorg/xvmc/surface.c     |  65 +---
 .../state_trackers/xorg/xvmc/xvmc_private.h        |   2 +-
 5 files changed, 241 insertions(+), 240 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 781ff2a4085..404a6c0c11d 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -38,8 +38,6 @@
 #include <util/u_video.h>
 #include <util/u_surface.h>
 
-#define NUM_BUFFERS 2
-
 static const unsigned const_empty_block_mask_420[3][2][2] = {
         { { 0x20, 0x10 },  { 0x08, 0x04 } },
         { { 0x02, 0x02 },  { 0x02, 0x02 } },
@@ -47,108 +45,60 @@ static const unsigned const_empty_block_mask_420[3][2][2] = {
 };
 
 static void
-flush_buffer(struct vl_mpeg12_context *ctx)
+map_buffer(struct vl_mpeg12_buffer *buf)
 {
-   unsigned ne_start, ne_num, e_start, e_num;
-   assert(ctx);
-
-   if(ctx->cur_buffer != NULL) {
-
-      vl_vb_unmap(&ctx->cur_buffer->vertex_stream, ctx->pipe);
-      vl_idct_unmap_buffers(&ctx->idct_y, &ctx->cur_buffer->idct_y);
-      vl_idct_unmap_buffers(&ctx->idct_cr, &ctx->cur_buffer->idct_cr);
-      vl_idct_unmap_buffers(&ctx->idct_cb, &ctx->cur_buffer->idct_cb);
-      vl_vb_restart(&ctx->cur_buffer->vertex_stream,
-		    &ne_start, &ne_num, &e_start, &e_num);
+   struct vl_mpeg12_context *ctx;
+   assert(buf);
 
-      ctx->pipe->set_vertex_buffers(ctx->pipe, 2, ctx->cur_buffer->vertex_bufs.all);
-      ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
-      vl_idct_flush(&ctx->idct_y, &ctx->cur_buffer->idct_y, ne_num);
-      vl_idct_flush(&ctx->idct_cr, &ctx->cur_buffer->idct_cr, ne_num);
-      vl_idct_flush(&ctx->idct_cb, &ctx->cur_buffer->idct_cb, ne_num);
-      vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &ctx->cur_buffer->mc,
-                                  ne_start, ne_num, e_start, e_num);
+   ctx = (struct vl_mpeg12_context *)buf->base.context;
+   assert(ctx);
 
-      ctx->cur_buffer = NULL;
+   if (!buf->mapped) {
+      vl_vb_map(&buf->vertex_stream, ctx->pipe);
+      vl_idct_map_buffers(&ctx->idct_y, &buf->idct_y);
+      vl_idct_map_buffers(&ctx->idct_cr, &buf->idct_cr);
+      vl_idct_map_buffers(&ctx->idct_cb, &buf->idct_cb);
+      buf->mapped = 1;
    }
 }
 
 static void
-rotate_buffer(struct vl_mpeg12_context *ctx)
+unmap_buffer(struct vl_mpeg12_buffer *buf)
 {
-   struct pipe_resource *y, *cr, *cb;
-   static unsigned key = 0;
-   struct vl_mpeg12_buffer *buffer;
+   struct vl_mpeg12_context *ctx;
+   assert(buf);
 
+   ctx = (struct vl_mpeg12_context *)buf->base.context;
    assert(ctx);
 
-   flush_buffer(ctx);
-
-   buffer = (struct vl_mpeg12_buffer*)util_keymap_lookup(ctx->buffer_map, &key);
-   if (!buffer) {
-      boolean added_to_map;
-
-      buffer = CALLOC_STRUCT(vl_mpeg12_buffer);
-      if (buffer == NULL)
-         return;
-
-      buffer->vertex_bufs.individual.quad.stride = ctx->quads.stride;
-      buffer->vertex_bufs.individual.quad.buffer_offset = ctx->quads.buffer_offset;
-      pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, ctx->quads.buffer);
-
-      buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, ctx->pipe,
-                                                         ctx->vertex_buffer_size);
-      if (!(y = vl_idct_init_buffer(&ctx->idct_y, &buffer->idct_y))) {
-         FREE(buffer);
-         return;
-      }
-
-      if (!(cr = vl_idct_init_buffer(&ctx->idct_cr, &buffer->idct_cr))) {
-         FREE(buffer);
-         return;
-      }
-
-      if (!(cb = vl_idct_init_buffer(&ctx->idct_cb, &buffer->idct_cb))) {
-         FREE(buffer);
-         return;
-      }
-
-      if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &buffer->mc, y, cr, cb)) {
-         FREE(buffer);
-         return;
-      }
-
-      added_to_map = util_keymap_insert(ctx->buffer_map, &key, buffer, ctx);
-      assert(added_to_map);
+   if (buf->mapped) {
+      vl_vb_unmap(&buf->vertex_stream, ctx->pipe);
+      vl_idct_unmap_buffers(&ctx->idct_y, &buf->idct_y);
+      vl_idct_unmap_buffers(&ctx->idct_cr, &buf->idct_cr);
+      vl_idct_unmap_buffers(&ctx->idct_cb, &buf->idct_cb);
+      buf->mapped = 0;
    }
-   ++key;
-   key %= NUM_BUFFERS;
-   ctx->cur_buffer = buffer;
-
-   vl_vb_map(&ctx->cur_buffer->vertex_stream, ctx->pipe);
-   vl_idct_map_buffers(&ctx->idct_y, &ctx->cur_buffer->idct_y);
-   vl_idct_map_buffers(&ctx->idct_cr, &ctx->cur_buffer->idct_cr);
-   vl_idct_map_buffers(&ctx->idct_cb, &ctx->cur_buffer->idct_cb);
 }
 
 static void
-delete_buffer(const struct keymap *map,
-              const void *key, void *data,
-              void *user)
+flush_buffer(struct vl_mpeg12_buffer *buf)
 {
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)user;
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)data;
+   unsigned ne_start, ne_num, e_start, e_num;
+   struct vl_mpeg12_context *ctx;
+   assert(buf);
 
-   assert(map);
-   assert(key);
-   assert(data);
-   assert(user);
+   ctx = (struct vl_mpeg12_context *)buf->base.context;
+   assert(ctx);
 
-   vl_vb_cleanup(&buf->vertex_stream);
-   vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct_y);
-   vl_idct_cleanup_buffer(&ctx->idct_cb, &buf->idct_cb);
-   vl_idct_cleanup_buffer(&ctx->idct_cr, &buf->idct_cr);
-   vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &buf->mc);
+   vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
+
+   ctx->pipe->set_vertex_buffers(ctx->pipe, 2, buf->vertex_bufs.all);
+   ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
+   vl_idct_flush(&ctx->idct_y, &buf->idct_y, ne_num);
+   vl_idct_flush(&ctx->idct_cr, &buf->idct_cr, ne_num);
+   vl_idct_flush(&ctx->idct_cb, &buf->idct_cb, ne_num);
+   vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &buf->mc,
+                               ne_start, ne_num, e_start, e_num);
 }
 
 static void
@@ -188,6 +138,71 @@ upload_buffer(struct vl_mpeg12_context *ctx,
    }
 }
 
+static void
+vl_mpeg12_buffer_destroy(struct pipe_video_buffer *buffer)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)buf->base.context;
+   assert(buf && ctx);
+
+   vl_vb_cleanup(&buf->vertex_stream);
+   vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct_y);
+   vl_idct_cleanup_buffer(&ctx->idct_cb, &buf->idct_cb);
+   vl_idct_cleanup_buffer(&ctx->idct_cr, &buf->idct_cr);
+   vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &buf->mc);
+   pipe_surface_reference(&buf->surface, NULL);
+
+   FREE(buf);
+}
+
+static void
+vl_mpeg12_buffer_add_macroblocks(struct pipe_video_buffer *buffer,
+                                 struct pipe_video_buffer *past,
+                                 struct pipe_video_buffer *future,
+                                 unsigned num_macroblocks,
+                                 struct pipe_macroblock *macroblocks,
+                                 struct pipe_fence_handle **fence)
+{
+   struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks;
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_buffer *buf_past = (struct vl_mpeg12_buffer*)past;
+   struct vl_mpeg12_buffer *buf_future = (struct vl_mpeg12_buffer*)future;
+   struct vl_mpeg12_context *ctx;
+   unsigned i;
+
+   assert(buf);
+
+   ctx =  (struct vl_mpeg12_context*)buf->base.context;
+   assert(ctx);
+
+   assert(num_macroblocks);
+   assert(macroblocks);
+   assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
+
+   map_buffer(buf);
+
+   for ( i = 0; i < num_macroblocks; ++i ) {
+      vl_vb_add_block(&buf->vertex_stream, &mpeg12_macroblocks[i], ctx->empty_block_mask);
+      upload_buffer(ctx, buf, &mpeg12_macroblocks[i]);
+   }
+
+   // TODO this doesn't belong here
+   if (buf_past) {
+      unmap_buffer(buf_past);
+      flush_buffer(buf_past);
+   }
+
+   if (buf_future) {
+      unmap_buffer(buf_future);
+      flush_buffer(buf_future);
+   }
+
+   vl_mpeg12_mc_set_surfaces(&ctx->mc_renderer, &buf->mc, buf->surface,
+                             buf_past ? buf_past->surface : NULL,
+                             buf_future ? buf_future->surface : NULL,
+                             fence);
+}
+
 static void
 vl_mpeg12_destroy(struct pipe_video_context *vpipe)
 {
@@ -195,8 +210,6 @@ vl_mpeg12_destroy(struct pipe_video_context *vpipe)
 
    assert(vpipe);
 
-   flush_buffer(ctx);
-
    /* Asserted in softpipe_delete_fs_state() for some reason */
    ctx->pipe->bind_vs_state(ctx->pipe, NULL);
    ctx->pipe->bind_fs_state(ctx->pipe, NULL);
@@ -205,9 +218,7 @@ vl_mpeg12_destroy(struct pipe_video_context *vpipe)
    ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast);
    ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
 
-   pipe_surface_reference(&ctx->decode_target, NULL);
    vl_compositor_cleanup(&ctx->compositor);
-   util_delete_keymap(ctx->buffer_map, ctx);
    vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
    vl_idct_cleanup(&ctx->idct_y);
    vl_idct_cleanup(&ctx->idct_cr);
@@ -246,11 +257,87 @@ vl_mpeg12_create_surface(struct pipe_video_context *vpipe,
 {
    struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
 
-   assert(vpipe);
+   assert(ctx);
 
    return ctx->pipe->create_surface(ctx->pipe, resource, templat);
 }
 
+static struct pipe_video_buffer *
+vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+   struct pipe_resource *y, *cr, *cb;
+   struct vl_mpeg12_buffer *buffer;
+
+   struct pipe_resource res_template, *resource;
+   struct pipe_surface surf_template;
+
+   assert(ctx);
+
+   buffer = CALLOC_STRUCT(vl_mpeg12_buffer);
+   if (buffer == NULL)
+      return NULL;
+
+   buffer->base.context = vpipe;
+   buffer->base.destroy = vl_mpeg12_buffer_destroy;
+   buffer->base.add_macroblocks = vl_mpeg12_buffer_add_macroblocks;
+
+   memset(&res_template, 0, sizeof(res_template));
+   res_template.target = PIPE_TEXTURE_2D;
+   res_template.format = ctx->decode_format;
+   res_template.last_level = 0;
+   res_template.width0 = ctx->buffer_width;
+   res_template.height0 = ctx->buffer_height;
+   res_template.depth0 = 1;
+   res_template.array_size = 1;
+   res_template.usage = PIPE_USAGE_DEFAULT;
+   res_template.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   res_template.flags = 0;
+   resource = ctx->pipe->screen->resource_create(ctx->pipe->screen, &res_template);
+   if (!resource) {
+      FREE(buffer);
+      return NULL;
+   }
+
+   memset(&surf_template, 0, sizeof(surf_template));
+   surf_template.format = resource->format;
+   surf_template.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   buffer->surface = ctx->pipe->create_surface(ctx->pipe, resource, &surf_template);
+   pipe_resource_reference(&resource, NULL);
+   if (!buffer->surface) {
+      FREE(buffer);
+      return NULL;
+   }
+
+   buffer->vertex_bufs.individual.quad.stride = ctx->quads.stride;
+   buffer->vertex_bufs.individual.quad.buffer_offset = ctx->quads.buffer_offset;
+   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, ctx->quads.buffer);
+
+   buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, ctx->pipe,
+                                                      ctx->vertex_buffer_size);
+   if (!(y = vl_idct_init_buffer(&ctx->idct_y, &buffer->idct_y))) {
+      FREE(buffer);
+      return NULL;
+   }
+
+   if (!(cr = vl_idct_init_buffer(&ctx->idct_cr, &buffer->idct_cr))) {
+      FREE(buffer);
+      return NULL;
+   }
+
+   if (!(cb = vl_idct_init_buffer(&ctx->idct_cb, &buffer->idct_cb))) {
+      FREE(buffer);
+      return NULL;
+   }
+
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &buffer->mc, y, cr, cb)) {
+      FREE(buffer);
+      return NULL;
+   }
+
+   return &buffer->base;
+}
+
 static boolean
 vl_mpeg12_is_format_supported(struct pipe_video_context *vpipe,
                               enum pipe_format format,
@@ -265,35 +352,6 @@ vl_mpeg12_is_format_supported(struct pipe_video_context *vpipe,
                                                  0, usage);
 }
 
-static void
-vl_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
-                             struct pipe_surface *past,
-                             struct pipe_surface *future,
-                             unsigned num_macroblocks,
-                             struct pipe_macroblock *macroblocks,
-                             struct pipe_fence_handle **fence)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-   struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks;
-   unsigned i;
-
-   assert(vpipe);
-   assert(num_macroblocks);
-   assert(macroblocks);
-   assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
-   assert(ctx->decode_target);
-   assert(ctx->cur_buffer);
-
-   for ( i = 0; i < num_macroblocks; ++i ) {
-      vl_vb_add_block(&ctx->cur_buffer->vertex_stream, &mpeg12_macroblocks[i],
-                      ctx->empty_block_mask);
-      upload_buffer(ctx, ctx->cur_buffer, &mpeg12_macroblocks[i]);
-   }
-
-   vl_mpeg12_mc_set_surfaces(&ctx->mc_renderer, &ctx->cur_buffer->mc,
-                             ctx->decode_target, past, future, fence);
-}
-
 static void
 vl_mpeg12_clear_render_target(struct pipe_video_context *vpipe,
                        struct pipe_surface *dst,
@@ -312,6 +370,7 @@ vl_mpeg12_clear_render_target(struct pipe_video_context *vpipe,
       util_clear_render_target(ctx->pipe, dst, rgba, dstx, dsty, width, height);
 }
 
+#if 0
 static void
 vl_mpeg12_resource_copy_region(struct pipe_video_context *vpipe,
                                struct pipe_resource *dst,
@@ -341,6 +400,7 @@ vl_mpeg12_resource_copy_region(struct pipe_video_context *vpipe,
                                 dstx, dsty, dstz,
                                 src, 0, &box);
 }
+#endif
 
 static struct pipe_transfer*
 vl_mpeg12_get_transfer(struct pipe_video_context *vpipe,
@@ -432,18 +492,15 @@ vl_mpeg12_transfer_inline_write(struct pipe_video_context *vpipe,
 
 static void
 vl_mpeg12_render_picture(struct pipe_video_context     *vpipe,
-                         struct pipe_surface           *src_surface,
-                         enum pipe_mpeg12_picture_type picture_type,
-                         /*unsigned                    num_past_surfaces,
-                         struct pipe_surface           *past_surfaces,
-                         unsigned                      num_future_surfaces,
-                         struct pipe_surface           *future_surfaces,*/
+                         struct pipe_video_buffer      *src_surface,
                          struct pipe_video_rect        *src_area,
+                         enum pipe_mpeg12_picture_type picture_type,
                          struct pipe_surface           *dst_surface,
                          struct pipe_video_rect        *dst_area,
                          struct pipe_fence_handle      **fence)
 {
    struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)src_surface;
 
    assert(vpipe);
    assert(src_surface);
@@ -451,10 +508,12 @@ vl_mpeg12_render_picture(struct pipe_video_context     *vpipe,
    assert(dst_surface);
    assert(dst_area);
 
-   flush_buffer(ctx);
+   unmap_buffer(buf);
+   flush_buffer(buf);
 
-   vl_compositor_render(&ctx->compositor, src_surface,
-                        picture_type, src_area, dst_surface, dst_area, fence);
+   vl_compositor_render(&ctx->compositor, buf->surface,
+                        picture_type, src_area,
+                        dst_surface, dst_area, fence);
 }
 
 static void
@@ -487,22 +546,6 @@ vl_mpeg12_set_picture_layers(struct pipe_video_context *vpipe,
    vl_compositor_set_layers(&ctx->compositor, layers, src_rects, dst_rects, num_layers);
 }
 
-static void
-vl_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
-                            struct pipe_surface *dt)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(dt);
-
-   if (ctx->decode_target != dt || ctx->cur_buffer == NULL) {
-      rotate_buffer(ctx);
-
-      pipe_surface_reference(&ctx->decode_target, dt);
-   }
-}
-
 static void
 vl_mpeg12_set_csc_matrix(struct pipe_video_context *vpipe, const float *mat)
 {
@@ -649,7 +692,6 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
                          enum pipe_format decode_format)
 {
    struct vl_mpeg12_context *ctx;
-   unsigned buffer_width, buffer_height;
 
    assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
 
@@ -669,10 +711,10 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
    ctx->base.get_param = vl_mpeg12_get_param;
    ctx->base.is_format_supported = vl_mpeg12_is_format_supported;
    ctx->base.create_surface = vl_mpeg12_create_surface;
-   ctx->base.decode_macroblocks = vl_mpeg12_decode_macroblocks;
+   ctx->base.create_buffer = vl_mpeg12_create_buffer;
    ctx->base.render_picture = vl_mpeg12_render_picture;
    ctx->base.clear_render_target = vl_mpeg12_clear_render_target;
-   ctx->base.resource_copy_region = vl_mpeg12_resource_copy_region;
+   //ctx->base.resource_copy_region = vl_mpeg12_resource_copy_region;
    ctx->base.get_transfer = vl_mpeg12_get_transfer;
    ctx->base.transfer_destroy = vl_mpeg12_transfer_destroy;
    ctx->base.transfer_map = vl_mpeg12_transfer_map;
@@ -682,7 +724,6 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
       ctx->base.transfer_inline_write = vl_mpeg12_transfer_inline_write;
    ctx->base.set_picture_background = vl_mpeg12_set_picture_background;
    ctx->base.set_picture_layers = vl_mpeg12_set_picture_layers;
-   ctx->base.set_decode_target = vl_mpeg12_set_decode_target;
    ctx->base.set_csc_matrix = vl_mpeg12_set_csc_matrix;
 
    ctx->pipe = pipe;
@@ -699,17 +740,18 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
       return NULL;
    }
 
-   buffer_width = pot_buffers ? util_next_power_of_two(width) : width;
-   buffer_height = pot_buffers ? util_next_power_of_two(height) : height;
+   ctx->buffer_width = pot_buffers ? util_next_power_of_two(width) : width;
+   ctx->buffer_height = pot_buffers ? util_next_power_of_two(height) : height;
 
-   if (!init_idct(ctx, buffer_width, buffer_height)) {
+   if (!init_idct(ctx, ctx->buffer_width, ctx->buffer_height)) {
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
    }
 
    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
-                                   buffer_width, buffer_height, chroma_format)) {
+                                   ctx->buffer_width, ctx->buffer_height,
+                                   chroma_format)) {
       vl_idct_cleanup(&ctx->idct_y);
       vl_idct_cleanup(&ctx->idct_cr);
       vl_idct_cleanup(&ctx->idct_cb);
@@ -718,23 +760,11 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
       return NULL;
    }
 
-   ctx->buffer_map = util_new_keymap(sizeof(unsigned), -1, delete_buffer);
-   if (!ctx->buffer_map) {
-      vl_idct_cleanup(&ctx->idct_y);
-      vl_idct_cleanup(&ctx->idct_cr);
-      vl_idct_cleanup(&ctx->idct_cb);
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
-   }
-
    if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) {
       vl_idct_cleanup(&ctx->idct_y);
       vl_idct_cleanup(&ctx->idct_cr);
       vl_idct_cleanup(&ctx->idct_cb);
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-      util_delete_keymap(ctx->buffer_map, ctx);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
@@ -745,7 +775,6 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
       vl_idct_cleanup(&ctx->idct_cr);
       vl_idct_cleanup(&ctx->idct_cb);
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-      util_delete_keymap(ctx->buffer_map, ctx);
       vl_compositor_cleanup(&ctx->compositor);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
index c88c436ad73..b2097c2e227 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -42,8 +42,8 @@ struct vl_mpeg12_context
    struct pipe_context *pipe;
    enum pipe_format decode_format;
    bool pot_buffers;
+   unsigned buffer_width, buffer_height;
 
-   struct pipe_surface *decode_target;
    const unsigned (*empty_block_mask)[3][2][2];
 
    struct pipe_vertex_buffer quads;
@@ -52,9 +52,6 @@ struct vl_mpeg12_context
 
    struct vl_idct idct_y, idct_cr, idct_cb;
    struct vl_mpeg12_mc_renderer mc_renderer;
-
-   struct keymap *buffer_map;
-   struct vl_mpeg12_buffer *cur_buffer;
    struct vl_compositor compositor;
 
    void *rast;
@@ -64,6 +61,10 @@ struct vl_mpeg12_context
 
 struct vl_mpeg12_buffer
 {
+   struct pipe_video_buffer base;
+   struct pipe_surface *surface;
+   bool mapped;
+
    struct vl_vertex_buffer vertex_stream;
 
    union
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 2d59741ec18..08e3c7e340b 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -43,6 +43,7 @@ struct pipe_surface;
 struct pipe_macroblock;
 struct pipe_picture_desc;
 struct pipe_fence_handle;
+struct pipe_video_buffer;
 
 /**
  * Gallium video rendering context
@@ -77,29 +78,26 @@ struct pipe_video_context
                                           struct pipe_resource *resource,
                                           const struct pipe_surface *templat);
 
+   /**
+    * Creates a buffer for as decoding target
+    */
+   struct pipe_video_buffer *(*create_buffer)(struct pipe_video_context *vpipe);
+
    /**
     * Picture decoding and displaying
     */
+
+#if 0
    /*@{*/
    void (*decode_bitstream)(struct pipe_video_context *vpipe,
                             unsigned num_bufs,
                             struct pipe_buffer **bitstream_buf);
-
-   void (*decode_macroblocks)(struct pipe_video_context *vpipe,
-                              struct pipe_surface *past,
-                              struct pipe_surface *future,
-                              unsigned num_macroblocks,
-                              struct pipe_macroblock *macroblocks,
-                              struct pipe_fence_handle **fence);
+#endif
 
    void (*render_picture)(struct pipe_video_context     *vpipe,
-                          struct pipe_surface           *src_surface,
-                          enum pipe_mpeg12_picture_type picture_type,
-                          /*unsigned                    num_past_surfaces,
-                          struct pipe_surface           *past_surfaces,
-                          unsigned                      num_future_surfaces,
-                          struct pipe_surface           *future_surfaces,*/
+                          struct pipe_video_buffer      *src_surface,
                           struct pipe_video_rect        *src_area,
+                          enum pipe_mpeg12_picture_type picture_type,
                           struct pipe_surface           *dst_surface,
                           struct pipe_video_rect        *dst_area,
                           struct pipe_fence_handle      **fence);
@@ -110,12 +108,14 @@ struct pipe_video_context
                                const float *rgba,
                                unsigned width, unsigned height);
 
+#if 0
    void (*resource_copy_region)(struct pipe_video_context *vpipe,
                                 struct pipe_resource *dst,
                                 unsigned dstx, unsigned dsty, unsigned dstz,
                                 struct pipe_resource *src,
                                 unsigned srcx, unsigned srcy, unsigned srcz,
                                 unsigned width, unsigned height);
+#endif
 
    struct pipe_transfer *(*get_transfer)(struct pipe_video_context *vpipe,
                                          struct pipe_resource *resource,
@@ -164,15 +164,27 @@ struct pipe_video_context
    void (*set_picture_desc)(struct pipe_video_context *vpipe,
                             const struct pipe_picture_desc *desc);
 
-   void (*set_decode_target)(struct pipe_video_context *vpipe,
-                             struct pipe_surface *dt);
-
    void (*set_csc_matrix)(struct pipe_video_context *vpipe, const float *mat);
 
    /* TODO: Interface for scaling modes, post-processing, etc. */
    /*@}*/
 };
 
+struct pipe_video_buffer
+{
+   struct pipe_video_context* context;
+
+   void (*destroy)(struct pipe_video_buffer *buffer);
+
+   void (*add_macroblocks)(struct pipe_video_buffer *buffer,
+                           struct pipe_video_buffer *past,
+                           struct pipe_video_buffer *future,
+                           unsigned num_macroblocks,
+                           struct pipe_macroblock *macroblocks,
+                           struct pipe_fence_handle **fence);
+
+
+};
 
 #ifdef __cplusplus
 }
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 5896cdca76d..4dedf001ce9 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -155,7 +155,7 @@ MacroBlocksToPipe(struct pipe_screen *screen,
                   unsigned int num_macroblocks,
                   struct pipe_mpeg12_macroblock *mb)
 {
-   unsigned int i, j, k, l;
+   unsigned int i, j;
    XvMCMacroBlock *xvmc_mb;
 
    assert(xvmc_macroblocks);
@@ -204,10 +204,6 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
    XvMCContextPrivate *context_priv;
    struct pipe_video_context *vpipe;
    XvMCSurfacePrivate *surface_priv;
-   struct pipe_resource template;
-   struct pipe_resource *vsfc_tex;
-   struct pipe_surface surf_template;
-   struct pipe_surface *vsfc;
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Creating surface %p.\n", surface);
 
@@ -225,45 +221,9 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
    if (!surface_priv)
       return BadAlloc;
 
-   memset(&template, 0, sizeof(struct pipe_resource));
-   template.target = PIPE_TEXTURE_2D;
-   template.format = (enum pipe_format)vpipe->get_param(vpipe, PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT);
-   if (!vpipe->is_format_supported(vpipe, template.format,
-                                   PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET)) {
-      FREE(surface_priv);
-      return BadAlloc;
-   }
-   template.last_level = 0;
-   if (vpipe->get_param(vpipe, PIPE_CAP_NPOT_TEXTURES)) {
-      template.width0 = context->width;
-      template.height0 = context->height;
-   }
-   else {
-      template.width0 = util_next_power_of_two(context->width);
-      template.height0 = util_next_power_of_two(context->height);
-   }
-   template.depth0 = 1;
-   template.array_size = 1;
-   template.usage = PIPE_USAGE_DEFAULT;
-   template.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-   template.flags = 0;
-   vsfc_tex = vpipe->screen->resource_create(vpipe->screen, &template);
-   if (!vsfc_tex) {
-      FREE(surface_priv);
-      return BadAlloc;
-   }
 
-   memset(&surf_template, 0, sizeof(surf_template));
-   surf_template.format = vsfc_tex->format;
-   surf_template.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-   vsfc = vpipe->create_surface(vpipe, vsfc_tex, &surf_template);
-   pipe_resource_reference(&vsfc_tex, NULL);
-   if (!vsfc) {
-      FREE(surface_priv);
-      return BadAlloc;
-   }
 
-   surface_priv->pipe_vsfc = vsfc;
+   surface_priv->pipe_buffer = vpipe->create_buffer(vpipe);
    surface_priv->context = context;
 
    surface->surface_id = XAllocID(dpy);
@@ -288,9 +248,9 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
 )
 {
    struct pipe_video_context *vpipe;
-   struct pipe_surface *t_vsfc;
-   struct pipe_surface *p_vsfc;
-   struct pipe_surface *f_vsfc;
+   struct pipe_video_buffer *t_vsfc;
+   struct pipe_video_buffer *p_vsfc;
+   struct pipe_video_buffer *f_vsfc;
    XvMCContextPrivate *context_priv;
    XvMCSurfacePrivate *target_surface_priv;
    XvMCSurfacePrivate *past_surface_priv;
@@ -337,16 +297,15 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    context_priv = context->privData;
    vpipe = context_priv->vctx->vpipe;
 
-   t_vsfc = target_surface_priv->pipe_vsfc;
-   p_vsfc = past_surface ? past_surface_priv->pipe_vsfc : NULL;
-   f_vsfc = future_surface ? future_surface_priv->pipe_vsfc : NULL;
+   t_vsfc = target_surface_priv->pipe_buffer;
+   p_vsfc = past_surface ? past_surface_priv->pipe_buffer : NULL;
+   f_vsfc = future_surface ? future_surface_priv->pipe_buffer : NULL;
 
    MacroBlocksToPipe(vpipe->screen, picture_structure, macroblocks, blocks, first_macroblock,
                      num_macroblocks, pipe_macroblocks);
 
-   vpipe->set_decode_target(vpipe, t_vsfc);
-   vpipe->decode_macroblocks(vpipe, p_vsfc, f_vsfc, num_macroblocks,
-                             &pipe_macroblocks->base, &target_surface_priv->render_fence);
+   t_vsfc->add_macroblocks(t_vsfc, p_vsfc, f_vsfc, num_macroblocks,
+                           &pipe_macroblocks->base, &target_surface_priv->render_fence);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for rendering.\n", target_surface);
 
@@ -447,7 +406,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    else
       vpipe->set_picture_layers(vpipe, NULL, NULL, NULL, 0);
 
-   vpipe->render_picture(vpipe, surface_priv->pipe_vsfc, PictureToPipe(flags), &src_rect,
+   vpipe->render_picture(vpipe, surface_priv->pipe_buffer, &src_rect, PictureToPipe(flags),
                          drawable_surface, &dst_rect, &surface_priv->disp_fence);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);
@@ -506,7 +465,7 @@ Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
       return XvMCBadSurface;
 
    surface_priv = surface->privData;
-   pipe_surface_reference(&surface_priv->pipe_vsfc, NULL);
+   surface_priv->pipe_buffer->destroy(surface_priv->pipe_buffer);
    FREE(surface_priv);
    surface->privData = NULL;
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 1e2dfb4223a..01b82df3ba8 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -49,7 +49,7 @@ typedef struct
 
 typedef struct
 {
-   struct pipe_surface *pipe_vsfc;
+   struct pipe_video_buffer *pipe_buffer;
    struct pipe_fence_handle *render_fence;
    struct pipe_fence_handle *disp_fence;
 
-- 
cgit v1.2.3


From f08d3bb59b862e5e176af11303e5068fdfa2100b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 22 Mar 2011 21:52:06 +0100
Subject: [g3dvl] buffers must be aligned to macroblock size

---
 src/gallium/auxiliary/vl/vl_mpeg12_context.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 404a6c0c11d..73e2a950bd4 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -740,8 +740,8 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
       return NULL;
    }
 
-   ctx->buffer_width = pot_buffers ? util_next_power_of_two(width) : width;
-   ctx->buffer_height = pot_buffers ? util_next_power_of_two(height) : height;
+   ctx->buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
+   ctx->buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
 
    if (!init_idct(ctx, ctx->buffer_width, ctx->buffer_height)) {
       ctx->pipe->destroy(ctx->pipe);
-- 
cgit v1.2.3


From f65cdb9ea5652a9b64f4102b046c373b80871577 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 22 Mar 2011 21:58:28 +0100
Subject: [g3dvl] fix configure.ac for r600 video targets

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index bc548a70360..b16fff74591 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1782,7 +1782,7 @@ AC_ARG_ENABLE([gallium-r600],
     [enable_gallium_r600=auto])
 if test "x$enable_gallium_r600" = xyes; then
     GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r600"
-    gallium_check_st "r600/drm" "dri-r600" "xvmc-r600" "vdpau-r600"
+    gallium_check_st "r600/drm" "dri-r600" "" "xvmc-r600" "vdpau-r600" "va-r600"
 fi
 
 dnl
-- 
cgit v1.2.3


From 884cb79edfefb1133229a002f41b4d370d717a7e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 24 Mar 2011 20:33:32 +0100
Subject: [g3dvl] make mapping and flushing of buffers a public interface

---
 src/gallium/auxiliary/vl/vl_mpeg12_context.c       | 151 ++++++++++-----------
 src/gallium/auxiliary/vl/vl_mpeg12_context.h       |   1 -
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c   |  62 ++-------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h   |  16 +--
 src/gallium/include/pipe/p_video_context.h         |  12 +-
 src/gallium/state_trackers/xorg/xvmc/surface.c     |  66 +++++++--
 .../state_trackers/xorg/xvmc/xvmc_private.h        |   5 +
 7 files changed, 156 insertions(+), 157 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 73e2a950bd4..4c0188c6040 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -44,63 +44,6 @@ static const unsigned const_empty_block_mask_420[3][2][2] = {
         { { 0x01, 0x01 },  { 0x01, 0x01 } }
 };
 
-static void
-map_buffer(struct vl_mpeg12_buffer *buf)
-{
-   struct vl_mpeg12_context *ctx;
-   assert(buf);
-
-   ctx = (struct vl_mpeg12_context *)buf->base.context;
-   assert(ctx);
-
-   if (!buf->mapped) {
-      vl_vb_map(&buf->vertex_stream, ctx->pipe);
-      vl_idct_map_buffers(&ctx->idct_y, &buf->idct_y);
-      vl_idct_map_buffers(&ctx->idct_cr, &buf->idct_cr);
-      vl_idct_map_buffers(&ctx->idct_cb, &buf->idct_cb);
-      buf->mapped = 1;
-   }
-}
-
-static void
-unmap_buffer(struct vl_mpeg12_buffer *buf)
-{
-   struct vl_mpeg12_context *ctx;
-   assert(buf);
-
-   ctx = (struct vl_mpeg12_context *)buf->base.context;
-   assert(ctx);
-
-   if (buf->mapped) {
-      vl_vb_unmap(&buf->vertex_stream, ctx->pipe);
-      vl_idct_unmap_buffers(&ctx->idct_y, &buf->idct_y);
-      vl_idct_unmap_buffers(&ctx->idct_cr, &buf->idct_cr);
-      vl_idct_unmap_buffers(&ctx->idct_cb, &buf->idct_cb);
-      buf->mapped = 0;
-   }
-}
-
-static void
-flush_buffer(struct vl_mpeg12_buffer *buf)
-{
-   unsigned ne_start, ne_num, e_start, e_num;
-   struct vl_mpeg12_context *ctx;
-   assert(buf);
-
-   ctx = (struct vl_mpeg12_context *)buf->base.context;
-   assert(ctx);
-
-   vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
-
-   ctx->pipe->set_vertex_buffers(ctx->pipe, 2, buf->vertex_bufs.all);
-   ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
-   vl_idct_flush(&ctx->idct_y, &buf->idct_y, ne_num);
-   vl_idct_flush(&ctx->idct_cr, &buf->idct_cr, ne_num);
-   vl_idct_flush(&ctx->idct_cb, &buf->idct_cb, ne_num);
-   vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &buf->mc,
-                               ne_start, ne_num, e_start, e_num);
-}
-
 static void
 upload_buffer(struct vl_mpeg12_context *ctx,
               struct vl_mpeg12_buffer *buffer,
@@ -149,24 +92,35 @@ vl_mpeg12_buffer_destroy(struct pipe_video_buffer *buffer)
    vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct_y);
    vl_idct_cleanup_buffer(&ctx->idct_cb, &buf->idct_cb);
    vl_idct_cleanup_buffer(&ctx->idct_cr, &buf->idct_cr);
-   vl_mpeg12_mc_cleanup_buffer(&ctx->mc_renderer, &buf->mc);
+   vl_mpeg12_mc_cleanup_buffer(&buf->mc);
    pipe_surface_reference(&buf->surface, NULL);
 
    FREE(buf);
 }
 
+static void
+vl_mpeg12_buffer_map(struct pipe_video_buffer *buffer)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_context *ctx;
+   assert(buf);
+
+   ctx = (struct vl_mpeg12_context *)buf->base.context;
+   assert(ctx);
+
+   vl_vb_map(&buf->vertex_stream, ctx->pipe);
+   vl_idct_map_buffers(&ctx->idct_y, &buf->idct_y);
+   vl_idct_map_buffers(&ctx->idct_cr, &buf->idct_cr);
+   vl_idct_map_buffers(&ctx->idct_cb, &buf->idct_cb);
+}
+
 static void
 vl_mpeg12_buffer_add_macroblocks(struct pipe_video_buffer *buffer,
-                                 struct pipe_video_buffer *past,
-                                 struct pipe_video_buffer *future,
                                  unsigned num_macroblocks,
-                                 struct pipe_macroblock *macroblocks,
-                                 struct pipe_fence_handle **fence)
+                                 struct pipe_macroblock *macroblocks)
 {
    struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks;
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_buffer *buf_past = (struct vl_mpeg12_buffer*)past;
-   struct vl_mpeg12_buffer *buf_future = (struct vl_mpeg12_buffer*)future;
    struct vl_mpeg12_context *ctx;
    unsigned i;
 
@@ -179,28 +133,61 @@ vl_mpeg12_buffer_add_macroblocks(struct pipe_video_buffer *buffer,
    assert(macroblocks);
    assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
 
-   map_buffer(buf);
-
    for ( i = 0; i < num_macroblocks; ++i ) {
       vl_vb_add_block(&buf->vertex_stream, &mpeg12_macroblocks[i], ctx->empty_block_mask);
       upload_buffer(ctx, buf, &mpeg12_macroblocks[i]);
    }
+}
 
-   // TODO this doesn't belong here
-   if (buf_past) {
-      unmap_buffer(buf_past);
-      flush_buffer(buf_past);
-   }
+static void
+vl_mpeg12_buffer_unmap(struct pipe_video_buffer *buffer)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_context *ctx;
+   assert(buf);
 
-   if (buf_future) {
-      unmap_buffer(buf_future);
-      flush_buffer(buf_future);
-   }
+   ctx = (struct vl_mpeg12_context *)buf->base.context;
+   assert(ctx);
 
-   vl_mpeg12_mc_set_surfaces(&ctx->mc_renderer, &buf->mc, buf->surface,
-                             buf_past ? buf_past->surface : NULL,
-                             buf_future ? buf_future->surface : NULL,
-                             fence);
+   vl_vb_unmap(&buf->vertex_stream, ctx->pipe);
+   vl_idct_unmap_buffers(&ctx->idct_y, &buf->idct_y);
+   vl_idct_unmap_buffers(&ctx->idct_cr, &buf->idct_cr);
+   vl_idct_unmap_buffers(&ctx->idct_cb, &buf->idct_cb);
+}
+
+static void
+vl_mpeg12_buffer_flush(struct pipe_video_buffer *buffer,
+                       struct pipe_video_buffer *refs[2],
+                       struct pipe_fence_handle **fence)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
+   struct vl_mpeg12_buffer *past = (struct vl_mpeg12_buffer *)refs[0];
+   struct vl_mpeg12_buffer *future = (struct vl_mpeg12_buffer *)refs[1];
+
+   struct pipe_surface *surf_refs[2];
+   unsigned ne_start, ne_num, e_start, e_num;
+   struct vl_mpeg12_context *ctx;
+
+   assert(buf);
+
+   ctx = (struct vl_mpeg12_context *)buf->base.context;
+   assert(ctx);
+
+   vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
+
+   ctx->pipe->set_vertex_buffers(ctx->pipe, 2, buf->vertex_bufs.all);
+   ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
+   vl_idct_flush(&ctx->idct_y, &buf->idct_y, ne_num);
+   vl_idct_flush(&ctx->idct_cr, &buf->idct_cr, ne_num);
+   vl_idct_flush(&ctx->idct_cb, &buf->idct_cb, ne_num);
+
+   surf_refs[0] = past ? past->surface : NULL;
+   surf_refs[1] = future ? future->surface : NULL;
+
+   vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &buf->mc,
+                               buf->surface, surf_refs,
+                               ne_start, ne_num, e_start, e_num,
+                               fence);
 }
 
 static void
@@ -280,7 +267,10 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
 
    buffer->base.context = vpipe;
    buffer->base.destroy = vl_mpeg12_buffer_destroy;
+   buffer->base.map = vl_mpeg12_buffer_map;
    buffer->base.add_macroblocks = vl_mpeg12_buffer_add_macroblocks;
+   buffer->base.unmap = vl_mpeg12_buffer_unmap;
+   buffer->base.flush = vl_mpeg12_buffer_flush;
 
    memset(&res_template, 0, sizeof(res_template));
    res_template.target = PIPE_TEXTURE_2D;
@@ -508,9 +498,6 @@ vl_mpeg12_render_picture(struct pipe_video_context     *vpipe,
    assert(dst_surface);
    assert(dst_area);
 
-   unmap_buffer(buf);
-   flush_buffer(buf);
-
    vl_compositor_render(&ctx->compositor, buf->surface,
                         picture_type, src_area,
                         dst_surface, dst_area, fence);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
index b2097c2e227..7269fa9730b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -63,7 +63,6 @@ struct vl_mpeg12_buffer
 {
    struct pipe_video_buffer base;
    struct pipe_surface *surface;
-   bool mapped;
 
    struct vl_vertex_buffer vertex_stream;
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 6cd811b4766..2f2c7870e3a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -282,7 +282,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
    struct ureg_src info;
    struct ureg_src tc[4], sampler[2];
    struct ureg_dst ref[2], result;
-   unsigned i, intra_label, bi_label, label;
+   unsigned i, intra_label;
 
    info = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO, TGSI_INTERPOLATE_CONSTANT);
 
@@ -549,10 +549,6 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
 
    assert(renderer && buffer);
 
-   buffer->surface = NULL;
-   buffer->past = NULL;
-   buffer->future = NULL;
-
    pipe_resource_reference(&buffer->textures.individual.y, y);
    pipe_resource_reference(&buffer->textures.individual.cr, cr);
    pipe_resource_reference(&buffer->textures.individual.cb, cb);
@@ -573,70 +569,45 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
 }
 
 void
-vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer)
+vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer)
 {
    unsigned i;
 
-   assert(renderer && buffer);
+   assert(buffer);
 
    for (i = 0; i < 3; ++i) {
       pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
       pipe_resource_reference(&buffer->textures.all[i], NULL);
    }
-
-   pipe_surface_reference(&buffer->surface, NULL);
-   pipe_surface_reference(&buffer->past, NULL);
-   pipe_surface_reference(&buffer->future, NULL);
-}
-
-void
-vl_mpeg12_mc_set_surfaces(struct vl_mpeg12_mc_renderer *renderer,
-                          struct vl_mpeg12_mc_buffer *buffer,
-                          struct pipe_surface *surface,
-                          struct pipe_surface *past,
-                          struct pipe_surface *future,
-                          struct pipe_fence_handle **fence)
-{
-   assert(renderer && buffer);
-   assert(surface);
-
-   if (surface != buffer->surface) {
-      pipe_surface_reference(&buffer->surface, surface);
-      pipe_surface_reference(&buffer->past, past);
-      pipe_surface_reference(&buffer->future, future);
-      buffer->fence = fence;
-   } else {
-      /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
-      assert(buffer->past == past);
-      assert(buffer->future == future);
-   }
 }
 
 void
 vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
+                            struct pipe_surface *surface, struct pipe_surface *ref[2],
                             unsigned not_empty_start_instance, unsigned not_empty_num_instances,
-                            unsigned empty_start_instance, unsigned empty_num_instances)
+                            unsigned empty_start_instance, unsigned empty_num_instances,
+                            struct pipe_fence_handle **fence)
 {
    assert(renderer && buffer);
 
    if (not_empty_num_instances == 0 && empty_num_instances == 0)
       return;
 
-   renderer->fb_state.cbufs[0] = buffer->surface;
+   renderer->fb_state.cbufs[0] = surface;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
    renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
 
-   if (buffer->past) {
-      buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->past);
+   if (ref[0]) {
+      buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, ref[0]);
    } else {
-      buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, buffer->surface);
+      buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, surface);
    }
 
-   if (buffer->future) {
-      buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->future);
+   if (ref[1]) {
+      buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, ref[1]);
    } else {
-      buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, buffer->surface);
+      buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, surface);
    }
 
    renderer->pipe->set_fragment_sampler_views(renderer->pipe, 5, buffer->sampler_views.all);
@@ -653,10 +624,5 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
       util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
                                  empty_start_instance, empty_num_instances);
 
-   renderer->pipe->flush(renderer->pipe, buffer->fence);
-
-   /* Next time we get this surface it may have new ref frames */
-   pipe_surface_reference(&buffer->surface, NULL);
-   pipe_surface_reference(&buffer->past, NULL);
-   pipe_surface_reference(&buffer->future, NULL);
+   renderer->pipe->flush(renderer->pipe, fence);
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index b761961b219..fa81c775131 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -73,9 +73,6 @@ struct vl_mpeg12_mc_buffer
       struct pipe_resource *all[3];
       struct { struct pipe_resource *y, *cb, *cr; } individual;
    } textures;
-
-   struct pipe_surface *surface, *past, *future;
-   struct pipe_fence_handle **fence;
 };
 
 bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
@@ -89,17 +86,12 @@ void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 bool vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
                               struct pipe_resource *y, struct pipe_resource *cr, struct pipe_resource *cb);
 
-void vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer);
-
-void vl_mpeg12_mc_set_surfaces(struct vl_mpeg12_mc_renderer *renderer,
-                               struct vl_mpeg12_mc_buffer *buffer,
-                               struct pipe_surface *surface,
-                               struct pipe_surface *past,
-                               struct pipe_surface *future,
-                               struct pipe_fence_handle **fence);
+void vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer);
 
 void vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
+                                 struct pipe_surface *surface, struct pipe_surface *ref[2],
                                  unsigned not_empty_start_instance, unsigned not_empty_num_instances,
-                                 unsigned empty_start_instance, unsigned empty_num_instances);
+                                 unsigned empty_start_instance, unsigned empty_num_instances,
+                                 struct pipe_fence_handle **fence);
 
 #endif /* vl_mpeg12_mc_renderer_h */
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 08e3c7e340b..aa903ddc125 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -176,13 +176,17 @@ struct pipe_video_buffer
 
    void (*destroy)(struct pipe_video_buffer *buffer);
 
+   void (*map)(struct pipe_video_buffer *buffer);
+
    void (*add_macroblocks)(struct pipe_video_buffer *buffer,
-                           struct pipe_video_buffer *past,
-                           struct pipe_video_buffer *future,
                            unsigned num_macroblocks,
-                           struct pipe_macroblock *macroblocks,
-                           struct pipe_fence_handle **fence);
+                           struct pipe_macroblock *macroblocks);
+
+   void (*unmap)(struct pipe_video_buffer *buffer);
 
+   void (*flush)(struct pipe_video_buffer *buffer,
+                 struct pipe_video_buffer *ref_frames[2],
+                 struct pipe_fence_handle **fence);
 
 };
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 4dedf001ce9..bb601929eb3 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -198,6 +198,37 @@ MacroBlocksToPipe(struct pipe_screen *screen,
    }
 }
 
+static void
+unmap_and_flush_surface(XvMCSurfacePrivate *surface)
+{
+   struct pipe_video_buffer *ref_frames[2];
+   unsigned i;
+
+   assert(surface);
+
+   for ( i = 0; i < 3; ++i ) {
+      if (surface->ref_surfaces[i]) {
+         XvMCSurfacePrivate *ref = surface->ref_surfaces[i]->privData;
+
+         assert(ref);
+
+         unmap_and_flush_surface(ref);
+         surface->ref_surfaces[i] = NULL;
+         ref_frames[i] = ref->pipe_buffer;
+      } else {
+         ref_frames[i] = NULL;
+      }
+   }
+
+   if (surface->mapped) {
+      surface->pipe_buffer->unmap(surface->pipe_buffer);
+      surface->pipe_buffer->flush(surface->pipe_buffer,
+                                  ref_frames,
+                                  &surface->flush_fence);
+      surface->mapped = 0;
+   }
+}
+
 PUBLIC
 Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surface)
 {
@@ -221,8 +252,6 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
    if (!surface_priv)
       return BadAlloc;
 
-
-
    surface_priv->pipe_buffer = vpipe->create_buffer(vpipe);
    surface_priv->context = context;
 
@@ -248,9 +277,7 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
 )
 {
    struct pipe_video_context *vpipe;
-   struct pipe_video_buffer *t_vsfc;
-   struct pipe_video_buffer *p_vsfc;
-   struct pipe_video_buffer *f_vsfc;
+   struct pipe_video_buffer *t_buffer;
    XvMCContextPrivate *context_priv;
    XvMCSurfacePrivate *target_surface_priv;
    XvMCSurfacePrivate *past_surface_priv;
@@ -297,15 +324,30 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    context_priv = context->privData;
    vpipe = context_priv->vctx->vpipe;
 
-   t_vsfc = target_surface_priv->pipe_buffer;
-   p_vsfc = past_surface ? past_surface_priv->pipe_buffer : NULL;
-   f_vsfc = future_surface ? future_surface_priv->pipe_buffer : NULL;
+   t_buffer = target_surface_priv->pipe_buffer;
+
+   // enshure that all reference frames are flushed
+   // not really nessasary, but speeds ups rendering
+   if (past_surface)
+      unmap_and_flush_surface(past_surface->privData);
+
+   if (future_surface)
+      unmap_and_flush_surface(future_surface->privData);
 
    MacroBlocksToPipe(vpipe->screen, picture_structure, macroblocks, blocks, first_macroblock,
                      num_macroblocks, pipe_macroblocks);
 
-   t_vsfc->add_macroblocks(t_vsfc, p_vsfc, f_vsfc, num_macroblocks,
-                           &pipe_macroblocks->base, &target_surface_priv->render_fence);
+   if (!target_surface_priv->mapped) {
+      t_buffer->map(t_buffer);
+      target_surface_priv->ref_surfaces[0] = past_surface;
+      target_surface_priv->ref_surfaces[1] = future_surface;
+      target_surface_priv->mapped = 1;
+   } else {
+      /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
+      assert(target_surface_priv->ref_surfaces[0] == past_surface);
+      assert(target_surface_priv->ref_surfaces[1] == future_surface);
+   }
+   t_buffer->add_macroblocks(t_buffer, num_macroblocks, &pipe_macroblocks->base);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for rendering.\n", target_surface);
 
@@ -320,6 +362,9 @@ Status XvMCFlushSurface(Display *dpy, XvMCSurface *surface)
    if (!surface)
       return XvMCBadSurface;
 
+   // don't call flush here, because this is usually
+   // called once for every slice instead of every frame
+
    return Success;
 }
 
@@ -406,6 +451,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    else
       vpipe->set_picture_layers(vpipe, NULL, NULL, NULL, 0);
 
+   unmap_and_flush_surface(surface_priv);
    vpipe->render_picture(vpipe, surface_priv->pipe_buffer, &src_rect, PictureToPipe(flags),
                          drawable_surface, &dst_rect, &surface_priv->disp_fence);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 01b82df3ba8..26be1f7b846 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -50,6 +50,11 @@ typedef struct
 typedef struct
 {
    struct pipe_video_buffer *pipe_buffer;
+   bool mapped; // are we still mapped to memory?
+
+   XvMCSurface *ref_surfaces[2];
+
+   struct pipe_fence_handle *flush_fence;
    struct pipe_fence_handle *render_fence;
    struct pipe_fence_handle *disp_fence;
 
-- 
cgit v1.2.3


From da3c6dd099786d20906b5a16288887b80cd8ad29 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 24 Mar 2011 21:24:58 +0100
Subject: [g3dvl] move sampler views for reference frames into context

---
 src/gallium/auxiliary/vl/vl_mpeg12_context.c     | 21 ++++--
 src/gallium/auxiliary/vl/vl_mpeg12_context.h     |  1 +
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 88 ++++++++----------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  4 +-
 4 files changed, 46 insertions(+), 68 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 4c0188c6040..39429df68b7 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -37,6 +37,7 @@
 #include <util/u_rect.h>
 #include <util/u_video.h>
 #include <util/u_surface.h>
+#include <util/u_sampler.h>
 
 static const unsigned const_empty_block_mask_420[3][2][2] = {
         { { 0x20, 0x10 },  { 0x08, 0x04 } },
@@ -94,6 +95,7 @@ vl_mpeg12_buffer_destroy(struct pipe_video_buffer *buffer)
    vl_idct_cleanup_buffer(&ctx->idct_cr, &buf->idct_cr);
    vl_mpeg12_mc_cleanup_buffer(&buf->mc);
    pipe_surface_reference(&buf->surface, NULL);
+   pipe_sampler_view_reference(&buf->sampler_view, NULL);
 
    FREE(buf);
 }
@@ -164,7 +166,7 @@ vl_mpeg12_buffer_flush(struct pipe_video_buffer *buffer,
    struct vl_mpeg12_buffer *past = (struct vl_mpeg12_buffer *)refs[0];
    struct vl_mpeg12_buffer *future = (struct vl_mpeg12_buffer *)refs[1];
 
-   struct pipe_surface *surf_refs[2];
+   struct pipe_sampler_view *sv_refs[2];
    unsigned ne_start, ne_num, e_start, e_num;
    struct vl_mpeg12_context *ctx;
 
@@ -181,11 +183,11 @@ vl_mpeg12_buffer_flush(struct pipe_video_buffer *buffer,
    vl_idct_flush(&ctx->idct_cr, &buf->idct_cr, ne_num);
    vl_idct_flush(&ctx->idct_cb, &buf->idct_cb, ne_num);
 
-   surf_refs[0] = past ? past->surface : NULL;
-   surf_refs[1] = future ? future->surface : NULL;
+   sv_refs[0] = past ? past->sampler_view : NULL;
+   sv_refs[1] = future ? future->sampler_view : NULL;
 
    vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &buf->mc,
-                               buf->surface, surf_refs,
+                               buf->surface, sv_refs,
                                ne_start, ne_num, e_start, e_num,
                                fence);
 }
@@ -258,6 +260,7 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
 
    struct pipe_resource res_template, *resource;
    struct pipe_surface surf_template;
+   struct pipe_sampler_view sv_template;
 
    assert(ctx);
 
@@ -293,12 +296,20 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
    surf_template.format = resource->format;
    surf_template.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
    buffer->surface = ctx->pipe->create_surface(ctx->pipe, resource, &surf_template);
-   pipe_resource_reference(&resource, NULL);
    if (!buffer->surface) {
       FREE(buffer);
       return NULL;
    }
 
+   u_sampler_view_default_template(&sv_template, resource, resource->format);
+   buffer->sampler_view = ctx->pipe->create_sampler_view(ctx->pipe, resource, &sv_template);
+   if (!buffer->sampler_view) {
+      FREE(buffer);
+      return NULL;
+   }
+
+   pipe_resource_reference(&resource, NULL);
+
    buffer->vertex_bufs.individual.quad.stride = ctx->quads.stride;
    buffer->vertex_bufs.individual.quad.buffer_offset = ctx->quads.buffer_offset;
    pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, ctx->quads.buffer);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
index 7269fa9730b..e4236adcec3 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -63,6 +63,7 @@ struct vl_mpeg12_buffer
 {
    struct pipe_video_buffer base;
    struct pipe_surface *surface;
+   struct pipe_sampler_view *sampler_view;
 
    struct vl_vertex_buffer vertex_stream;
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 2f2c7870e3a..218ff5d1c14 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -444,48 +444,6 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
    r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
 }
 
-static struct pipe_sampler_view
-*find_or_create_sampler_view(struct vl_mpeg12_mc_renderer *r, struct pipe_surface *surface)
-{
-   struct pipe_sampler_view *sampler_view;
-   assert(r);
-   assert(surface);
-
-   sampler_view = (struct pipe_sampler_view*)util_keymap_lookup(r->texview_map, &surface);
-   if (!sampler_view) {
-      struct pipe_sampler_view templat;
-      boolean added_to_map;
-
-      u_sampler_view_default_template(&templat, surface->texture,
-                                      surface->texture->format);
-      sampler_view = r->pipe->create_sampler_view(r->pipe, surface->texture,
-                                                  &templat);
-      if (!sampler_view)
-         return NULL;
-
-      added_to_map = util_keymap_insert(r->texview_map, &surface,
-                                        sampler_view, r->pipe);
-      assert(added_to_map);
-   }
-
-   return sampler_view;
-}
-
-static void
-texview_map_delete(const struct keymap *map,
-                   const void *key, void *data,
-                   void *user)
-{
-   struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
-
-   assert(map);
-   assert(key);
-   assert(data);
-   assert(user);
-
-   pipe_sampler_view_reference(&sv, NULL);
-}
-
 bool
 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            struct pipe_context *pipe,
@@ -493,6 +451,9 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            unsigned buffer_height,
                            enum pipe_video_chroma_format chroma_format)
 {
+   struct pipe_resource tex_templ, *tex_dummy;
+   struct pipe_sampler_view sampler_view;
+
    assert(renderer);
    assert(pipe);
 
@@ -503,11 +464,6 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    renderer->buffer_height = buffer_height;
    renderer->chroma_format = chroma_format;
 
-   renderer->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
-                                           texview_map_delete);
-   if (!renderer->texview_map)
-      return false;
-
    if (!init_pipe_state(renderer))
       goto error_pipe_state;
 
@@ -517,13 +473,30 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    if (renderer->vs == NULL || renderer->fs == NULL)
       goto error_shaders;
 
+   /* create a dummy sampler */
+   memset(&tex_templ, 0, sizeof(tex_templ));
+   tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
+   tex_templ.flags = 0;
+
+   tex_templ.target = PIPE_TEXTURE_2D;
+   tex_templ.format = PIPE_FORMAT_R8_SNORM;
+   tex_templ.width0 = 1;
+   tex_templ.height0 = 1;
+   tex_templ.depth0 = 1;
+   tex_templ.array_size = 1;
+   tex_templ.last_level = 0;
+   tex_templ.usage = PIPE_USAGE_STATIC;
+   tex_dummy = pipe->screen->resource_create(pipe->screen, &tex_templ);
+
+   u_sampler_view_default_template(&sampler_view, tex_dummy, tex_dummy->format);
+   renderer->dummy = pipe->create_sampler_view(pipe, tex_dummy, &sampler_view);
+
    return true;
 
 error_shaders:
    cleanup_pipe_state(renderer);
 
 error_pipe_state:
-   util_delete_keymap(renderer->texview_map, renderer->pipe);
    return false;
 }
 
@@ -532,7 +505,8 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 {
    assert(renderer);
 
-   util_delete_keymap(renderer->texview_map, renderer->pipe);
+   pipe_sampler_view_reference(&renderer->dummy, NULL);
+
    cleanup_pipe_state(renderer);
 
    renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs);
@@ -583,7 +557,7 @@ vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer)
 
 void
 vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
-                            struct pipe_surface *surface, struct pipe_surface *ref[2],
+                            struct pipe_surface *surface, struct pipe_sampler_view *ref[2],
                             unsigned not_empty_start_instance, unsigned not_empty_num_instances,
                             unsigned empty_start_instance, unsigned empty_num_instances,
                             struct pipe_fence_handle **fence)
@@ -598,17 +572,9 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
 
-   if (ref[0]) {
-      buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, ref[0]);
-   } else {
-      buffer->sampler_views.individual.ref[0] = find_or_create_sampler_view(renderer, surface);
-   }
-
-   if (ref[1]) {
-      buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, ref[1]);
-   } else {
-      buffer->sampler_views.individual.ref[1] = find_or_create_sampler_view(renderer, surface);
-   }
+   /* if no reference frame provided use a dummy sampler instead */
+   buffer->sampler_views.individual.ref[0] = ref[0] ? ref[0] : renderer->dummy;
+   buffer->sampler_views.individual.ref[1] = ref[1] ? ref[1] : renderer->dummy;
 
    renderer->pipe->set_fragment_sampler_views(renderer->pipe, 5, buffer->sampler_views.all);
    renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 5, renderer->samplers.all);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index fa81c775131..29a548ef70d 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -57,7 +57,7 @@ struct vl_mpeg12_mc_renderer
       struct { void *y, *cb, *cr, *ref[2]; } individual;
    } samplers;
 
-   struct keymap *texview_map;
+   struct pipe_sampler_view *dummy;
 };
 
 struct vl_mpeg12_mc_buffer
@@ -89,7 +89,7 @@ bool vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_
 void vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer);
 
 void vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
-                                 struct pipe_surface *surface, struct pipe_surface *ref[2],
+                                 struct pipe_surface *surface, struct pipe_sampler_view *ref[2],
                                  unsigned not_empty_start_instance, unsigned not_empty_num_instances,
                                  unsigned empty_start_instance, unsigned empty_num_instances,
                                  struct pipe_fence_handle **fence);
-- 
cgit v1.2.3


From ce6f8331fa520bc464a9fa50c18fe57678dd0a24 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 25 Mar 2011 19:32:22 +0100
Subject: [g3dvl] make ref_surface handling more sane

---
 src/gallium/state_trackers/xorg/xvmc/surface.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index bb601929eb3..03301238883 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -334,6 +334,15 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    if (future_surface)
       unmap_and_flush_surface(future_surface->privData);
 
+   /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
+   if (target_surface_priv->mapped && (
+       target_surface_priv->ref_surfaces[0] != past_surface ||
+       target_surface_priv->ref_surfaces[1] != future_surface)) {
+
+      // If they change anyway we need to flush our surface
+      unmap_and_flush_surface(target_surface_priv);
+   }
+
    MacroBlocksToPipe(vpipe->screen, picture_structure, macroblocks, blocks, first_macroblock,
                      num_macroblocks, pipe_macroblocks);
 
@@ -342,11 +351,8 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
       target_surface_priv->ref_surfaces[0] = past_surface;
       target_surface_priv->ref_surfaces[1] = future_surface;
       target_surface_priv->mapped = 1;
-   } else {
-      /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
-      assert(target_surface_priv->ref_surfaces[0] == past_surface);
-      assert(target_surface_priv->ref_surfaces[1] == future_surface);
    }
+
    t_buffer->add_macroblocks(t_buffer, num_macroblocks, &pipe_macroblocks->base);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for rendering.\n", target_surface);
-- 
cgit v1.2.3


From a17788ac490744b631fc7dd47e94af3296895701 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 25 Mar 2011 21:10:56 +0100
Subject: [g3dvl] start implementing AI44 and IA44 subpicture

---
 src/gallium/state_trackers/xorg/xvmc/subpicture.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 78de154bdd7..548be8bed60 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -44,6 +44,11 @@ static enum pipe_format XvIDToPipe(int xvimage_id)
    switch (xvimage_id) {
       case FOURCC_RGB:
          return PIPE_FORMAT_B8G8R8X8_UNORM;
+
+      case FOURCC_AI44:
+      case FOURCC_IA44:
+         return PIPE_FORMAT_L4A4_UNORM;
+
       default:
          XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized Xv image ID 0x%08X.\n", xvimage_id);
          return PIPE_FORMAT_NONE;
@@ -56,6 +61,7 @@ static int PipeToComponentOrder(enum pipe_format format, char *component_order)
 
    switch (format) {
       case PIPE_FORMAT_B8G8R8X8_UNORM:
+      case PIPE_FORMAT_L4A4_UNORM:
          return 0;
       default:
          XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized PIPE_FORMAT 0x%08X.\n", format);
@@ -64,8 +70,7 @@ static int PipeToComponentOrder(enum pipe_format format, char *component_order)
          component_order[2] = 0;
          component_order[3] = 0;
    }
-
-      return 0;
+   return 0;
 }
 
 static Status Validate(Display *dpy, XvPortID port, int surface_type_id, int xvimage_id)
@@ -101,7 +106,8 @@ static Status Validate(Display *dpy, XvPortID port, int surface_type_id, int xvi
                                  "[XvMC]   red mask=0x%08X\n" \
                                  "[XvMC]   green mask=0x%08X\n" \
                                  "[XvMC]   blue mask=0x%08X\n",
-                                 subpictures[i].depth, subpictures[i].red_mask, subpictures[i].green_mask, subpictures[i].blue_mask);
+                                 subpictures[i].depth, subpictures[i].red_mask,
+                                 subpictures[i].green_mask, subpictures[i].blue_mask);
          }
          else if (subpictures[i].type == XvYUV) {
             XVMC_MSG(XVMC_TRACE, "[XvMC]   y sample bits=0x%08X\n" \
@@ -226,17 +232,15 @@ Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, sh
 
    /* Convert color to float */
    util_format_read_4f(PIPE_FORMAT_B8G8R8A8_UNORM,
-                    color_f, 1,
-                    &color, 4,
-                    0, 0, 1, 1);
+                       color_f, 1, &color, 4,
+                       0, 0, 1, 1);
 
    subpicture_priv = subpicture->privData;
    context_priv = subpicture_priv->context->privData;
    /* TODO: Assert clear rect is within bounds? Or clip? */
    context_priv->vctx->vpipe->clear_render_target(context_priv->vctx->vpipe,
-                                           subpicture_priv->sfc, x, y,
-										   color_f,
-                                           width, height);
+                                                  subpicture_priv->sfc, x, y,
+                                                  color_f, width, height);
 
    return Success;
 }
-- 
cgit v1.2.3


From 4a0b80f00dbc77d333027afd195daae7ef1e651c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 25 Mar 2011 23:38:50 +0100
Subject: [g3dvl] rework supicture handling

This gets ia44 and ai44 at least partial working
---
 src/gallium/auxiliary/vl/vl_compositor.c           |  34 +++--
 src/gallium/auxiliary/vl/vl_compositor.h           |  14 +--
 src/gallium/auxiliary/vl/vl_mpeg12_context.c       | 138 ++++++++++++++-------
 src/gallium/include/pipe/p_video_context.h         |  49 +++++---
 src/gallium/state_trackers/xorg/xvmc/subpicture.c  |  76 +++++-------
 src/gallium/state_trackers/xorg/xvmc/surface.c     |   2 +-
 .../state_trackers/xorg/xvmc/xvmc_private.h        |   4 +-
 7 files changed, 178 insertions(+), 139 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index c54e5476f3a..cef8b6992ab 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -364,6 +364,7 @@ void vl_compositor_cleanup(struct vl_compositor *compositor)
    cleanup_pipe_state(compositor);
 }
 
+#if 0
 void vl_compositor_set_background(struct vl_compositor *compositor,
                                  struct pipe_surface *bg, struct pipe_video_rect *bg_src_rect)
 {
@@ -378,9 +379,10 @@ void vl_compositor_set_background(struct vl_compositor *compositor,
       compositor->dirty_bg = true;
    }
 }
+#endif
 
 void vl_compositor_set_layers(struct vl_compositor *compositor,
-                              struct pipe_surface *layers[],
+                              struct pipe_sampler_view *layers[],
                               struct pipe_video_rect *src_rects[],
                               struct pipe_video_rect *dst_rects[],
                               unsigned num_layers)
@@ -399,11 +401,9 @@ void vl_compositor_set_layers(struct vl_compositor *compositor,
           !u_video_rects_equal(&compositor->layer_src_rects[i], src_rects[i]) ||
           !u_video_rects_equal(&compositor->layer_dst_rects[i], dst_rects[i]))
       {
-         pipe_surface_reference(&compositor->layers[i], layers[i]);
-         /*if (!u_video_rects_equal(&compositor->layer_src_rects[i], src_rects[i]))*/
-            compositor->layer_src_rects[i] = *src_rects[i];
-         /*if (!u_video_rects_equal(&compositor->layer_dst_rects[i], dst_rects[i]))*/
-            compositor->layer_dst_rects[i] = *dst_rects[i];
+         pipe_sampler_view_reference(&compositor->layers[i], layers[i]);
+         compositor->layer_src_rects[i] = *src_rects[i];
+         compositor->layer_dst_rects[i] = *dst_rects[i];
          compositor->dirty_layers |= 1 << i;
       }
 
@@ -412,7 +412,7 @@ void vl_compositor_set_layers(struct vl_compositor *compositor,
    }
 
    for (; i < VL_COMPOSITOR_MAX_LAYERS; ++i)
-      pipe_surface_reference(&compositor->layers[i], NULL);
+      pipe_sampler_view_reference(&compositor->layers[i], NULL);
 }
 
 static void gen_rect_verts(unsigned pos,
@@ -460,10 +460,10 @@ static void gen_rect_verts(unsigned pos,
 }
 
 static unsigned gen_data(struct vl_compositor *c,
-                         struct pipe_surface *src_surface,
+                         struct pipe_sampler_view *src_surface,
                          struct pipe_video_rect *src_rect,
                          struct pipe_video_rect *dst_rect,
-                         struct pipe_surface **textures,
+                         struct pipe_sampler_view **textures,
                          void **frag_shaders)
 {
    void *vb;
@@ -485,7 +485,7 @@ static unsigned gen_data(struct vl_compositor *c,
       return 0;
 
    if (c->dirty_bg) {
-      struct vertex2f bg_inv_size = {1.0f / c->bg->width, 1.0f / c->bg->height};
+      struct vertex2f bg_inv_size = {1.0f / c->bg->texture->width0, 1.0f / c->bg->texture->height0};
       gen_rect_verts(num_rects, &c->bg_src_rect, &bg_inv_size, NULL, NULL, vb);
       textures[num_rects] = c->bg;
       /* XXX: Hack */
@@ -495,7 +495,7 @@ static unsigned gen_data(struct vl_compositor *c,
    }
 
    {
-      struct vertex2f src_inv_size = { 1.0f / src_surface->width, 1.0f / src_surface->height};
+      struct vertex2f src_inv_size = { 1.0f / src_surface->texture->width0, 1.0f / src_surface->texture->height0};
       gen_rect_verts(num_rects, src_rect, &src_inv_size, dst_rect, &c->fb_inv_size, vb);
       textures[num_rects] = src_surface;
       /* XXX: Hack, sort of */
@@ -507,7 +507,7 @@ static unsigned gen_data(struct vl_compositor *c,
       assert(i < VL_COMPOSITOR_MAX_LAYERS);
 
       if (c->dirty_layers & (1 << i)) {
-         struct vertex2f layer_inv_size = {1.0f / c->layers[i]->width, 1.0f / c->layers[i]->height};
+         struct vertex2f layer_inv_size = {1.0f / c->layers[i]->texture->width0, 1.0f / c->layers[i]->texture->height0};
          gen_rect_verts(num_rects, &c->layer_src_rects[i], &layer_inv_size,
                         &c->layer_dst_rects[i], &c->fb_inv_size, vb);
          textures[num_rects] = c->layers[i];
@@ -524,12 +524,12 @@ static unsigned gen_data(struct vl_compositor *c,
 }
 
 static void draw_layers(struct vl_compositor *c,
-                        struct pipe_surface *src_surface,
+                        struct pipe_sampler_view *src_surface,
                         struct pipe_video_rect *src_rect,
                         struct pipe_video_rect *dst_rect)
 {
    unsigned num_rects;
-   struct pipe_surface *src_surfaces[VL_COMPOSITOR_MAX_LAYERS + 2];
+   struct pipe_sampler_view *src_surfaces[VL_COMPOSITOR_MAX_LAYERS + 2];
    void *frag_shaders[VL_COMPOSITOR_MAX_LAYERS + 2];
    unsigned i;
 
@@ -569,12 +569,8 @@ static void draw_layers(struct vl_compositor *c,
 }
 
 void vl_compositor_render(struct vl_compositor          *compositor,
-                          struct pipe_surface           *src_surface,
+                          struct pipe_sampler_view      *src_surface,
                           enum pipe_mpeg12_picture_type picture_type,
-                          /*unsigned                    num_past_surfaces,
-                          struct pipe_surface           *past_surfaces,
-                          unsigned                      num_future_surfaces,
-                          struct pipe_surface           *future_surfaces,*/
                           struct pipe_video_rect        *src_area,
                           struct pipe_surface           *dst_surface,
                           struct pipe_video_rect        *dst_area,
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 820c9ef6ddb..c2de98de0ef 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -57,10 +57,10 @@ struct vl_compositor
    void *vertex_elems_state;
    struct pipe_resource *fs_const_buf;
 
-   struct pipe_surface *bg;
+   struct pipe_sampler_view *bg;
    struct pipe_video_rect bg_src_rect;
    bool dirty_bg;
-   struct pipe_surface *layers[VL_COMPOSITOR_MAX_LAYERS];
+   struct pipe_sampler_view *layers[VL_COMPOSITOR_MAX_LAYERS];
    struct pipe_video_rect layer_src_rects[VL_COMPOSITOR_MAX_LAYERS];
    struct pipe_video_rect layer_dst_rects[VL_COMPOSITOR_MAX_LAYERS];
    unsigned dirty_layers;
@@ -72,22 +72,20 @@ bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *p
 
 void vl_compositor_cleanup(struct vl_compositor *compositor);
 
+#if 0
 void vl_compositor_set_background(struct vl_compositor *compositor,
                                   struct pipe_surface *bg, struct pipe_video_rect *bg_src_rect);
+#endif
 
 void vl_compositor_set_layers(struct vl_compositor *compositor,
-                              struct pipe_surface *layers[],
+                              struct pipe_sampler_view *layers[],
                               struct pipe_video_rect *src_rects[],
                               struct pipe_video_rect *dst_rects[],
                               unsigned num_layers);
 
 void vl_compositor_render(struct vl_compositor          *compositor,
-                          struct pipe_surface           *src_surface,
+                          struct pipe_sampler_view      *src_surface,
                           enum pipe_mpeg12_picture_type picture_type,
-                          /*unsigned                    num_past_surfaces,
-                          struct pipe_surface           *past_surfaces,
-                          unsigned                      num_future_surfaces,
-                          struct pipe_surface           *future_surfaces,*/
                           struct pipe_video_rect        *src_area,
                           struct pipe_surface           *dst_surface,
                           struct pipe_video_rect        *dst_area,
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 39429df68b7..185380ed216 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -242,13 +242,25 @@ vl_mpeg12_get_param(struct pipe_video_context *vpipe, int param)
 static struct pipe_surface *
 vl_mpeg12_create_surface(struct pipe_video_context *vpipe,
                          struct pipe_resource *resource,
-                         const struct pipe_surface *templat)
+                         const struct pipe_surface *templ)
 {
    struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
 
    assert(ctx);
 
-   return ctx->pipe->create_surface(ctx->pipe, resource, templat);
+   return ctx->pipe->create_surface(ctx->pipe, resource, templ);
+}
+
+static struct pipe_sampler_view *
+vl_mpeg12_create_sampler_view(struct pipe_video_context *vpipe,
+                              struct pipe_resource *resource,
+                              const struct pipe_sampler_view *templ)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+
+   assert(ctx);
+
+   return ctx->pipe->create_sampler_view(ctx->pipe, resource, templ);
 }
 
 static struct pipe_video_buffer *
@@ -353,24 +365,6 @@ vl_mpeg12_is_format_supported(struct pipe_video_context *vpipe,
                                                  0, usage);
 }
 
-static void
-vl_mpeg12_clear_render_target(struct pipe_video_context *vpipe,
-                       struct pipe_surface *dst,
-                       unsigned dstx, unsigned dsty,
-                       const float *rgba,
-                       unsigned width, unsigned height)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(dst);
-
-   if (ctx->pipe->clear_render_target)
-      ctx->pipe->clear_render_target(ctx->pipe, dst, rgba, dstx, dsty, width, height);
-   else
-      util_clear_render_target(ctx->pipe, dst, rgba, dstx, dsty, width, height);
-}
-
 #if 0
 static void
 vl_mpeg12_resource_copy_region(struct pipe_video_context *vpipe,
@@ -401,7 +395,6 @@ vl_mpeg12_resource_copy_region(struct pipe_video_context *vpipe,
                                 dstx, dsty, dstz,
                                 src, 0, &box);
 }
-#endif
 
 static struct pipe_transfer*
 vl_mpeg12_get_transfer(struct pipe_video_context *vpipe,
@@ -469,26 +462,77 @@ vl_mpeg12_transfer_unmap(struct pipe_video_context *vpipe,
    ctx->pipe->transfer_unmap(ctx->pipe, transfer);
 }
 
+#endif
+
 static void
-vl_mpeg12_transfer_inline_write(struct pipe_video_context *vpipe,
-                                struct pipe_resource *resource,
-                                unsigned level,
-                                unsigned usage, /* a combination of PIPE_TRANSFER_x */
-                                const struct pipe_box *box,
-                                const void *data,
-                                unsigned stride,
-                                unsigned slice_stride)
+vl_mpeg12_clear_sampler(struct pipe_video_context *vpipe,
+                        struct pipe_sampler_view *dst,
+                        const struct pipe_box *dst_box,
+                        const float *rgba)
 {
    struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+   struct pipe_transfer *transfer;
+   union util_color uc;
+   void *map;
+   unsigned i;
 
    assert(vpipe);
-   assert(resource);
-   assert(box);
-   assert(data);
-   assert(ctx->pipe->transfer_inline_write);
+   assert(dst);
+   assert(dst_box);
+   assert(rgba);
 
-   ctx->pipe->transfer_inline_write(ctx->pipe, resource, level, usage,
-                                    box, data, stride, slice_stride);
+   transfer = ctx->pipe->get_transfer(ctx->pipe, dst->texture, 0, PIPE_TRANSFER_WRITE, dst_box);
+   if (!transfer)
+      return;
+
+   map = ctx->pipe->transfer_map(ctx->pipe, transfer);
+   if (!transfer)
+      goto error_map;
+
+   for ( i = 0; i < 4; ++i)
+      uc.f[i] = rgba[i];
+
+   util_fill_rect(map, dst->texture->format, transfer->stride, 0, 0,
+                  dst_box->width, dst_box->height, &uc);
+
+   ctx->pipe->transfer_unmap(ctx->pipe, transfer);
+
+error_map:
+   ctx->pipe->transfer_destroy(ctx->pipe, transfer);
+}
+
+static void
+vl_mpeg12_upload_sampler(struct pipe_video_context *vpipe,
+                         struct pipe_sampler_view *dst,
+                         const struct pipe_box *dst_box,
+                         const void *src, unsigned src_stride,
+                         unsigned src_x, unsigned src_y)
+{
+   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
+   struct pipe_transfer *transfer;
+   void *map;
+
+   assert(vpipe);
+   assert(dst);
+   assert(dst_box);
+   assert(src);
+
+   transfer = ctx->pipe->get_transfer(ctx->pipe, dst->texture, 0, PIPE_TRANSFER_WRITE, dst_box);
+   if (!transfer)
+      return;
+
+   map = ctx->pipe->transfer_map(ctx->pipe, transfer);
+   if (!transfer)
+      goto error_map;
+
+   util_copy_rect(map, dst->texture->format, transfer->stride, 0, 0,
+                  dst_box->width, dst_box->height,
+                  src, src_stride, src_x, src_y);
+
+   ctx->pipe->transfer_unmap(ctx->pipe, transfer);
+
+error_map:
+   ctx->pipe->transfer_destroy(ctx->pipe, transfer);
 }
 
 static void
@@ -509,11 +553,12 @@ vl_mpeg12_render_picture(struct pipe_video_context     *vpipe,
    assert(dst_surface);
    assert(dst_area);
 
-   vl_compositor_render(&ctx->compositor, buf->surface,
+   vl_compositor_render(&ctx->compositor, buf->sampler_view,
                         picture_type, src_area,
                         dst_surface, dst_area, fence);
 }
 
+#if 0
 static void
 vl_mpeg12_set_picture_background(struct pipe_video_context *vpipe,
                                   struct pipe_surface *bg,
@@ -527,10 +572,11 @@ vl_mpeg12_set_picture_background(struct pipe_video_context *vpipe,
 
    vl_compositor_set_background(&ctx->compositor, bg, bg_src_rect);
 }
+#endif
 
 static void
 vl_mpeg12_set_picture_layers(struct pipe_video_context *vpipe,
-                             struct pipe_surface *layers[],
+                             struct pipe_sampler_view *layers[],
                              struct pipe_video_rect *src_rects[],
                              struct pipe_video_rect *dst_rects[],
                              unsigned num_layers)
@@ -709,18 +755,18 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
    ctx->base.get_param = vl_mpeg12_get_param;
    ctx->base.is_format_supported = vl_mpeg12_is_format_supported;
    ctx->base.create_surface = vl_mpeg12_create_surface;
+   ctx->base.create_sampler_view = vl_mpeg12_create_sampler_view;
    ctx->base.create_buffer = vl_mpeg12_create_buffer;
    ctx->base.render_picture = vl_mpeg12_render_picture;
-   ctx->base.clear_render_target = vl_mpeg12_clear_render_target;
+   ctx->base.clear_sampler = vl_mpeg12_clear_sampler;
    //ctx->base.resource_copy_region = vl_mpeg12_resource_copy_region;
-   ctx->base.get_transfer = vl_mpeg12_get_transfer;
-   ctx->base.transfer_destroy = vl_mpeg12_transfer_destroy;
-   ctx->base.transfer_map = vl_mpeg12_transfer_map;
-   ctx->base.transfer_flush_region = vl_mpeg12_transfer_flush_region;
-   ctx->base.transfer_unmap = vl_mpeg12_transfer_unmap;
-   if (pipe->transfer_inline_write)
-      ctx->base.transfer_inline_write = vl_mpeg12_transfer_inline_write;
-   ctx->base.set_picture_background = vl_mpeg12_set_picture_background;
+   //ctx->base.get_transfer = vl_mpeg12_get_transfer;
+   //ctx->base.transfer_destroy = vl_mpeg12_transfer_destroy;
+   //ctx->base.transfer_map = vl_mpeg12_transfer_map;
+   //ctx->base.transfer_flush_region = vl_mpeg12_transfer_flush_region;
+   //ctx->base.transfer_unmap = vl_mpeg12_transfer_unmap;
+   ctx->base.upload_sampler = vl_mpeg12_upload_sampler;
+   //ctx->base.set_picture_background = vl_mpeg12_set_picture_background;
    ctx->base.set_picture_layers = vl_mpeg12_set_picture_layers;
    ctx->base.set_csc_matrix = vl_mpeg12_set_csc_matrix;
 
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index aa903ddc125..be40c36366a 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -72,14 +72,27 @@ struct pipe_video_context
                                   enum pipe_format format,
                                   unsigned usage);
 
+   /**
+    * destroy context, all buffers must be freed before calling this
+    */
    void (*destroy)(struct pipe_video_context *vpipe);
 
+   /**
+    * create a surface of a texture
+    */
    struct pipe_surface *(*create_surface)(struct pipe_video_context *vpipe,
                                           struct pipe_resource *resource,
-                                          const struct pipe_surface *templat);
+                                          const struct pipe_surface *templ);
 
    /**
-    * Creates a buffer for as decoding target
+    * create a sampler view of a texture, for subpictures for example
+    */
+   struct pipe_sampler_view *(*create_sampler_view)(struct pipe_video_context *vpipe,
+                                                    struct pipe_resource *resource,
+                                                    const struct pipe_sampler_view *templ);
+
+   /**
+    * Creates a buffer as decoding target
     */
    struct pipe_video_buffer *(*create_buffer)(struct pipe_video_context *vpipe);
 
@@ -102,12 +115,6 @@ struct pipe_video_context
                           struct pipe_video_rect        *dst_area,
                           struct pipe_fence_handle      **fence);
 
-   void (*clear_render_target)(struct pipe_video_context *vpipe,
-                               struct pipe_surface *dst,
-                               unsigned dstx, unsigned dsty,
-                               const float *rgba,
-                               unsigned width, unsigned height);
-
 #if 0
    void (*resource_copy_region)(struct pipe_video_context *vpipe,
                                 struct pipe_resource *dst,
@@ -115,7 +122,6 @@ struct pipe_video_context
                                 struct pipe_resource *src,
                                 unsigned srcx, unsigned srcy, unsigned srcz,
                                 unsigned width, unsigned height);
-#endif
 
    struct pipe_transfer *(*get_transfer)(struct pipe_video_context *vpipe,
                                          struct pipe_resource *resource,
@@ -135,15 +141,18 @@ struct pipe_video_context
 
    void (*transfer_unmap)(struct pipe_video_context *vpipe,
                           struct pipe_transfer *transfer);
+#endif
 
-   void (*transfer_inline_write)(struct pipe_video_context *vpipe,
-                                 struct pipe_resource *resource,
-                                 unsigned level,
-                                 unsigned usage, /* a combination of PIPE_TRANSFER_x */
-                                 const struct pipe_box *box,
-                                 const void *data,
-                                 unsigned stride,
-                                 unsigned slice_stride);
+   void (*upload_sampler)(struct pipe_video_context *vpipe,
+                          struct pipe_sampler_view *dst,
+                          const struct pipe_box *dst_box,
+                          const void *src, unsigned src_stride,
+                          unsigned src_x, unsigned src_y);
+
+   void (*clear_sampler)(struct pipe_video_context *vpipe,
+                         struct pipe_sampler_view *dst,
+                         const struct pipe_box *dst_box,
+                         const float *rgba);
 
    /*@}*/
 
@@ -151,18 +160,22 @@ struct pipe_video_context
     * Parameter-like states (or properties)
     */
    /*@{*/
+#if 0
    void (*set_picture_background)(struct pipe_video_context *vpipe,
                                   struct pipe_surface *bg,
                                   struct pipe_video_rect *bg_src_rect);
+#endif
 
    void (*set_picture_layers)(struct pipe_video_context *vpipe,
-                              struct pipe_surface *layers[],
+                              struct pipe_sampler_view *layers[],
                               struct pipe_video_rect *src_rects[],
                               struct pipe_video_rect *dst_rects[],
                               unsigned num_layers);
 
+#if 0
    void (*set_picture_desc)(struct pipe_video_context *vpipe,
                             const struct pipe_picture_desc *desc);
+#endif
 
    void (*set_csc_matrix)(struct pipe_video_context *vpipe, const float *mat);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 548be8bed60..7123e3b0634 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -35,6 +35,7 @@
 #include <pipe/p_state.h>
 #include <util/u_memory.h>
 #include <util/u_math.h>
+#include <util/u_format.h>
 #include "xvmc_private.h"
 
 #define FOURCC_RGB 0x0000003
@@ -139,9 +140,8 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    XvMCContextPrivate *context_priv;
    XvMCSubpicturePrivate *subpicture_priv;
    struct pipe_video_context *vpipe;
-   struct pipe_resource template;
-   struct pipe_resource *tex;
-   struct pipe_surface surf_template;
+   struct pipe_resource tex_templ, *tex;
+   struct pipe_sampler_view sampler_templ;
    Status ret;
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Creating subpicture %p.\n", subpicture);
@@ -169,44 +169,42 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    if (!subpicture_priv)
       return BadAlloc;
 
-   memset(&template, 0, sizeof(struct pipe_resource));
-   template.target = PIPE_TEXTURE_2D;
-   template.format = XvIDToPipe(xvimage_id);
-   template.last_level = 0;
+   memset(&tex_templ, 0, sizeof(tex_templ));
+   tex_templ.target = PIPE_TEXTURE_2D;
+   tex_templ.format = XvIDToPipe(xvimage_id);
+   tex_templ.last_level = 0;
    if (vpipe->get_param(vpipe, PIPE_CAP_NPOT_TEXTURES)) {
-      template.width0 = width;
-      template.height0 = height;
+      tex_templ.width0 = width;
+      tex_templ.height0 = height;
    }
    else {
-      template.width0 = util_next_power_of_two(width);
-      template.height0 = util_next_power_of_two(height);
+      tex_templ.width0 = util_next_power_of_two(width);
+      tex_templ.height0 = util_next_power_of_two(height);
    }
-   template.depth0 = 1;
-   template.array_size = 1;
-   template.usage = PIPE_USAGE_DYNAMIC;
-   template.bind = PIPE_BIND_SAMPLER_VIEW;
-   template.flags = 0;
+   tex_templ.depth0 = 1;
+   tex_templ.array_size = 1;
+   tex_templ.usage = PIPE_USAGE_DYNAMIC;
+   tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
+   tex_templ.flags = 0;
 
-   subpicture_priv->context = context;
-   tex = vpipe->screen->resource_create(vpipe->screen, &template);
+   tex = vpipe->screen->resource_create(vpipe->screen, &tex_templ);
 
-   memset(&surf_template, 0, sizeof(surf_template));
-   surf_template.format = tex->format;
-   surf_template.usage = PIPE_BIND_SAMPLER_VIEW;
-   subpicture_priv->sfc = vpipe->create_surface(vpipe, tex, &surf_template);
+   memset(&sampler_templ, 0, sizeof(sampler_templ));
+   subpicture_priv->sampler = vpipe->create_sampler_view(vpipe, tex, &sampler_templ);
    pipe_resource_reference(&tex, NULL);
-   if (!subpicture_priv->sfc) {
+   if (!subpicture_priv->sampler) {
       FREE(subpicture_priv);
       return BadAlloc;
    }
 
+   subpicture_priv->context = context;
    subpicture->subpicture_id = XAllocID(dpy);
    subpicture->context_id = context->context_id;
    subpicture->xvimage_id = xvimage_id;
    subpicture->width = width;
    subpicture->height = height;
    subpicture->num_palette_entries = 0;
-   subpicture->entry_bytes = PipeToComponentOrder(template.format, subpicture->component_order);
+   subpicture->entry_bytes = PipeToComponentOrder(tex_templ.format, subpicture->component_order);
    subpicture->privData = subpicture_priv;
 
    SyncHandle();
@@ -222,7 +220,6 @@ Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, sh
 {
    XvMCSubpicturePrivate *subpicture_priv;
    XvMCContextPrivate *context_priv;
-   unsigned int tmp_color;
    float color_f[4];
 
    assert(dpy);
@@ -238,9 +235,9 @@ Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, sh
    subpicture_priv = subpicture->privData;
    context_priv = subpicture_priv->context->privData;
    /* TODO: Assert clear rect is within bounds? Or clip? */
-   context_priv->vctx->vpipe->clear_render_target(context_priv->vctx->vpipe,
-                                                  subpicture_priv->sfc, x, y,
-                                                  color_f, width, height);
+   //context_priv->vctx->vpipe->clear_render_target(context_priv->vctx->vpipe,
+   //                                               subpicture_priv->sampler, x, y,
+   //                                               color_f, width, height);
 
    return Success;
 }
@@ -253,7 +250,7 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
    XvMCSubpicturePrivate *subpicture_priv;
    XvMCContextPrivate *context_priv;
    struct pipe_video_context *vpipe;
-   struct pipe_transfer *xfer;
+
    unsigned char *src, *dst, *dst_line;
    unsigned x, y;
    struct pipe_box dst_box = {dstx, dsty, 0, width, height, 1};
@@ -279,19 +276,10 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
    vpipe = context_priv->vctx->vpipe;
 
    /* TODO: Assert rects are within bounds? Or clip? */
+   vpipe->upload_sampler(vpipe, subpicture_priv->sampler, &dst_box,
+                         image->data, width*3, srcx, srcy);
 
-   xfer = vpipe->get_transfer(vpipe, subpicture_priv->sfc->texture,
-                              0, PIPE_TRANSFER_WRITE, &dst_box);
-   if (!xfer)
-      return BadAlloc;
-
-   src = image->data;
-   dst = vpipe->transfer_map(vpipe, xfer);
-   if (!dst) {
-      vpipe->transfer_destroy(vpipe, xfer);
-      return BadAlloc;
-   }
-
+#if 0
    switch (image->id) {
       case FOURCC_RGB:
          assert(subpicture_priv->sfc->format == XvIDToPipe(image->id));
@@ -308,9 +296,7 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
       default:
          XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized Xv image ID 0x%08X.\n", image->id);
    }
-
-   vpipe->transfer_unmap(vpipe, xfer);
-   vpipe->transfer_destroy(vpipe, xfer);
+#endif
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p composited.\n", subpicture);
 
@@ -330,7 +316,7 @@ Status XvMCDestroySubpicture(Display *dpy, XvMCSubpicture *subpicture)
       return XvMCBadSubpicture;
 
    subpicture_priv = subpicture->privData;
-   pipe_surface_reference(&subpicture_priv->sfc, NULL);
+   pipe_sampler_view_reference(&subpicture_priv->sampler, NULL);
    FREE(subpicture_priv);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p destroyed.\n", subpicture);
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 03301238883..6fb19124867 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -449,7 +449,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
       XVMC_MSG(XVMC_TRACE, "[XvMC] Surface %p has subpicture %p.\n", surface, surface_priv->subpicture);
 
       assert(subpicture_priv->surface == surface);
-      vpipe->set_picture_layers(vpipe, &subpicture_priv->sfc, src_rects, dst_rects, 1);
+      vpipe->set_picture_layers(vpipe, &subpicture_priv->sampler, src_rects, dst_rects, 1);
 
       surface_priv->subpicture = NULL;
       subpicture_priv->surface = NULL;
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 26be1f7b846..68dfb0d355f 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -36,7 +36,7 @@
 #define BLOCK_SIZE_BYTES (BLOCK_SIZE_SAMPLES * 2)
 
 struct vl_context;
-struct pipe_surface;
+struct pipe_sampler_view;
 struct pipe_fence_handle;
 
 typedef struct
@@ -72,7 +72,7 @@ typedef struct
 
 typedef struct
 {
-   struct pipe_surface *sfc;
+   struct pipe_sampler_view *sampler;
 
    /* The surface this subpicture is currently associated with, if any. */
    XvMCSurface *surface;
-- 
cgit v1.2.3


From 3d40d4f391e2fc319a03d8f171a2cfb9daf250c8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 26 Mar 2011 00:20:16 +0100
Subject: [g3dvl] throw out all unused parts of the interface

---
 src/gallium/auxiliary/vl/vl_compositor.c          |  14 +--
 src/gallium/auxiliary/vl/vl_compositor.h          |   8 --
 src/gallium/auxiliary/vl/vl_mpeg12_context.c      | 122 ----------------------
 src/gallium/include/pipe/p_video_context.h        |  92 ++++++++--------
 src/gallium/state_trackers/xorg/xvmc/context.c    |  12 +--
 src/gallium/state_trackers/xorg/xvmc/subpicture.c |  51 ++++-----
 6 files changed, 75 insertions(+), 224 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index cef8b6992ab..278f86bc54a 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -345,8 +345,6 @@ bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *p
 
    compositor->fb_state.width = 0;
    compositor->fb_state.height = 0;
-   compositor->bg = NULL;
-   compositor->dirty_bg = false;
    for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i)
       compositor->layers[i] = NULL;
    compositor->dirty_layers = 0;
@@ -484,16 +482,6 @@ static unsigned gen_data(struct vl_compositor *c,
    if (!vb)
       return 0;
 
-   if (c->dirty_bg) {
-      struct vertex2f bg_inv_size = {1.0f / c->bg->texture->width0, 1.0f / c->bg->texture->height0};
-      gen_rect_verts(num_rects, &c->bg_src_rect, &bg_inv_size, NULL, NULL, vb);
-      textures[num_rects] = c->bg;
-      /* XXX: Hack */
-      frag_shaders[num_rects] = c->fragment_shader.rgb_2_rgb;
-      ++num_rects;
-      c->dirty_bg = false;
-   }
-
    {
       struct vertex2f src_inv_size = { 1.0f / src_surface->texture->width0, 1.0f / src_surface->texture->height0};
       gen_rect_verts(num_rects, src_rect, &src_inv_size, dst_rect, &c->fb_inv_size, vb);
@@ -613,7 +601,7 @@ void vl_compositor_render(struct vl_compositor          *compositor,
 
    draw_layers(compositor, src_surface, src_area, dst_area);
 
-   assert(!compositor->dirty_bg && !compositor->dirty_layers);
+   assert(!compositor->dirty_layers);
    compositor->pipe->flush(compositor->pipe, fence);
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index c2de98de0ef..8bea7ab39cf 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -57,9 +57,6 @@ struct vl_compositor
    void *vertex_elems_state;
    struct pipe_resource *fs_const_buf;
 
-   struct pipe_sampler_view *bg;
-   struct pipe_video_rect bg_src_rect;
-   bool dirty_bg;
    struct pipe_sampler_view *layers[VL_COMPOSITOR_MAX_LAYERS];
    struct pipe_video_rect layer_src_rects[VL_COMPOSITOR_MAX_LAYERS];
    struct pipe_video_rect layer_dst_rects[VL_COMPOSITOR_MAX_LAYERS];
@@ -72,11 +69,6 @@ bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *p
 
 void vl_compositor_cleanup(struct vl_compositor *compositor);
 
-#if 0
-void vl_compositor_set_background(struct vl_compositor *compositor,
-                                  struct pipe_surface *bg, struct pipe_video_rect *bg_src_rect);
-#endif
-
 void vl_compositor_set_layers(struct vl_compositor *compositor,
                               struct pipe_sampler_view *layers[],
                               struct pipe_video_rect *src_rects[],
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 185380ed216..af1079e480b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -365,105 +365,6 @@ vl_mpeg12_is_format_supported(struct pipe_video_context *vpipe,
                                                  0, usage);
 }
 
-#if 0
-static void
-vl_mpeg12_resource_copy_region(struct pipe_video_context *vpipe,
-                               struct pipe_resource *dst,
-                               unsigned dstx, unsigned dsty, unsigned dstz,
-                               struct pipe_resource *src,
-                               unsigned srcx, unsigned srcy, unsigned srcz,
-                               unsigned width, unsigned height)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(dst);
-
-   struct pipe_box box;
-   box.x = srcx;
-   box.y = srcy;
-   box.z = srcz;
-   box.width = width;
-   box.height = height;
-
-   if (ctx->pipe->resource_copy_region)
-      ctx->pipe->resource_copy_region(ctx->pipe, dst, 0,
-                                      dstx, dsty, dstz,
-                                      src, 0, &box);
-   else
-      util_resource_copy_region(ctx->pipe, dst, 0,
-                                dstx, dsty, dstz,
-                                src, 0, &box);
-}
-
-static struct pipe_transfer*
-vl_mpeg12_get_transfer(struct pipe_video_context *vpipe,
-                       struct pipe_resource *resource,
-                       unsigned level,
-                       unsigned usage,  /* a combination of PIPE_TRANSFER_x */
-                       const struct pipe_box *box)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(resource);
-   assert(box);
-
-   return ctx->pipe->get_transfer(ctx->pipe, resource, level, usage, box);
-}
-
-static void
-vl_mpeg12_transfer_destroy(struct pipe_video_context *vpipe,
-                           struct pipe_transfer *transfer)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(transfer);
-
-   ctx->pipe->transfer_destroy(ctx->pipe, transfer);
-}
-
-static void*
-vl_mpeg12_transfer_map(struct pipe_video_context *vpipe,
-                       struct pipe_transfer *transfer)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(transfer);
-
-   return ctx->pipe->transfer_map(ctx->pipe, transfer);
-}
-
-static void
-vl_mpeg12_transfer_flush_region(struct pipe_video_context *vpipe,
-                                struct pipe_transfer *transfer,
-                                const struct pipe_box *box)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(transfer);
-   assert(box);
-
-   ctx->pipe->transfer_flush_region(ctx->pipe, transfer, box);
-}
-
-static void
-vl_mpeg12_transfer_unmap(struct pipe_video_context *vpipe,
-                         struct pipe_transfer *transfer)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(transfer);
-
-   ctx->pipe->transfer_unmap(ctx->pipe, transfer);
-}
-
-#endif
-
 static void
 vl_mpeg12_clear_sampler(struct pipe_video_context *vpipe,
                         struct pipe_sampler_view *dst,
@@ -558,22 +459,6 @@ vl_mpeg12_render_picture(struct pipe_video_context     *vpipe,
                         dst_surface, dst_area, fence);
 }
 
-#if 0
-static void
-vl_mpeg12_set_picture_background(struct pipe_video_context *vpipe,
-                                  struct pipe_surface *bg,
-                                  struct pipe_video_rect *bg_src_rect)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert(bg);
-   assert(bg_src_rect);
-
-   vl_compositor_set_background(&ctx->compositor, bg, bg_src_rect);
-}
-#endif
-
 static void
 vl_mpeg12_set_picture_layers(struct pipe_video_context *vpipe,
                              struct pipe_sampler_view *layers[],
@@ -759,14 +644,7 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
    ctx->base.create_buffer = vl_mpeg12_create_buffer;
    ctx->base.render_picture = vl_mpeg12_render_picture;
    ctx->base.clear_sampler = vl_mpeg12_clear_sampler;
-   //ctx->base.resource_copy_region = vl_mpeg12_resource_copy_region;
-   //ctx->base.get_transfer = vl_mpeg12_get_transfer;
-   //ctx->base.transfer_destroy = vl_mpeg12_transfer_destroy;
-   //ctx->base.transfer_map = vl_mpeg12_transfer_map;
-   //ctx->base.transfer_flush_region = vl_mpeg12_transfer_flush_region;
-   //ctx->base.transfer_unmap = vl_mpeg12_transfer_unmap;
    ctx->base.upload_sampler = vl_mpeg12_upload_sampler;
-   //ctx->base.set_picture_background = vl_mpeg12_set_picture_background;
    ctx->base.set_picture_layers = vl_mpeg12_set_picture_layers;
    ctx->base.set_csc_matrix = vl_mpeg12_set_csc_matrix;
 
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index be40c36366a..49b1038eea7 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -84,6 +84,11 @@ struct pipe_video_context
                                           struct pipe_resource *resource,
                                           const struct pipe_surface *templ);
 
+   /**
+    * sampler view handling, used for subpictures for example
+    */
+   /*@{*/
+
    /**
     * create a sampler view of a texture, for subpictures for example
     */
@@ -91,6 +96,22 @@ struct pipe_video_context
                                                     struct pipe_resource *resource,
                                                     const struct pipe_sampler_view *templ);
 
+   /**
+    * upload image data to a sampler
+    */
+   void (*upload_sampler)(struct pipe_video_context *vpipe,
+                          struct pipe_sampler_view *dst,
+                          const struct pipe_box *dst_box,
+                          const void *src, unsigned src_stride,
+                          unsigned src_x, unsigned src_y);
+
+   /**
+    * clear a sampler with a specific rgba color
+    */
+   void (*clear_sampler)(struct pipe_video_context *vpipe,
+                         struct pipe_sampler_view *dst,
+                         const struct pipe_box *dst_box,
+                         const float *rgba);
    /**
     * Creates a buffer as decoding target
     */
@@ -101,12 +122,14 @@ struct pipe_video_context
     */
 
 #if 0
-   /*@{*/
    void (*decode_bitstream)(struct pipe_video_context *vpipe,
                             unsigned num_bufs,
                             struct pipe_buffer **bitstream_buf);
 #endif
 
+   /**
+    * render a video buffer to the frontbuffer
+    */
    void (*render_picture)(struct pipe_video_context     *vpipe,
                           struct pipe_video_buffer      *src_surface,
                           struct pipe_video_rect        *src_area,
@@ -115,68 +138,22 @@ struct pipe_video_context
                           struct pipe_video_rect        *dst_area,
                           struct pipe_fence_handle      **fence);
 
-#if 0
-   void (*resource_copy_region)(struct pipe_video_context *vpipe,
-                                struct pipe_resource *dst,
-                                unsigned dstx, unsigned dsty, unsigned dstz,
-                                struct pipe_resource *src,
-                                unsigned srcx, unsigned srcy, unsigned srcz,
-                                unsigned width, unsigned height);
-
-   struct pipe_transfer *(*get_transfer)(struct pipe_video_context *vpipe,
-                                         struct pipe_resource *resource,
-                                         unsigned level,
-                                         unsigned usage,  /* a combination of PIPE_TRANSFER_x */
-                                         const struct pipe_box *box);
-
-   void (*transfer_destroy)(struct pipe_video_context *vpipe,
-                            struct pipe_transfer *transfer);
-
-   void* (*transfer_map)(struct pipe_video_context *vpipe,
-                         struct pipe_transfer *transfer);
-
-   void (*transfer_flush_region)(struct pipe_video_context *vpipe,
-                                 struct pipe_transfer *transfer,
-                                 const struct pipe_box *box);
-
-   void (*transfer_unmap)(struct pipe_video_context *vpipe,
-                          struct pipe_transfer *transfer);
-#endif
-
-   void (*upload_sampler)(struct pipe_video_context *vpipe,
-                          struct pipe_sampler_view *dst,
-                          const struct pipe_box *dst_box,
-                          const void *src, unsigned src_stride,
-                          unsigned src_x, unsigned src_y);
-
-   void (*clear_sampler)(struct pipe_video_context *vpipe,
-                         struct pipe_sampler_view *dst,
-                         const struct pipe_box *dst_box,
-                         const float *rgba);
-
    /*@}*/
 
    /**
     * Parameter-like states (or properties)
     */
    /*@{*/
-#if 0
-   void (*set_picture_background)(struct pipe_video_context *vpipe,
-                                  struct pipe_surface *bg,
-                                  struct pipe_video_rect *bg_src_rect);
-#endif
 
+   /**
+    * set overlay samplers
+    */
    void (*set_picture_layers)(struct pipe_video_context *vpipe,
                               struct pipe_sampler_view *layers[],
                               struct pipe_video_rect *src_rects[],
                               struct pipe_video_rect *dst_rects[],
                               unsigned num_layers);
 
-#if 0
-   void (*set_picture_desc)(struct pipe_video_context *vpipe,
-                            const struct pipe_picture_desc *desc);
-#endif
-
    void (*set_csc_matrix)(struct pipe_video_context *vpipe, const float *mat);
 
    /* TODO: Interface for scaling modes, post-processing, etc. */
@@ -187,16 +164,31 @@ struct pipe_video_buffer
 {
    struct pipe_video_context* context;
 
+   /**
+    * destroy this video buffer
+    */
    void (*destroy)(struct pipe_video_buffer *buffer);
 
+   /**
+    * map the buffer into memory before calling add_macroblocks
+    */
    void (*map)(struct pipe_video_buffer *buffer);
 
+   /**
+    * add macroblocks to buffer for decoding
+    */
    void (*add_macroblocks)(struct pipe_video_buffer *buffer,
                            unsigned num_macroblocks,
                            struct pipe_macroblock *macroblocks);
 
+   /**
+    * unmap buffer before flushing
+    */
    void (*unmap)(struct pipe_video_buffer *buffer);
 
+   /**
+    * flush buffer to video hardware
+    */
    void (*flush)(struct pipe_video_buffer *buffer,
                  struct pipe_video_buffer *ref_frames[2],
                  struct pipe_fence_handle **fence);
diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index 06a1633288b..b1d17cc9149 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -177,12 +177,12 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
                          int width, int height, int flags, XvMCContext *context)
 {
    bool found_port;
-   int scrn;
-   int chroma_format;
-   int mc_type;
-   int surface_flags;
-   unsigned short subpic_max_w;
-   unsigned short subpic_max_h;
+   int scrn = 0;
+   int chroma_format = 0;
+   int mc_type = 0;
+   int surface_flags = 0;
+   unsigned short subpic_max_w = 0;
+   unsigned short subpic_max_h = 0;
    Status ret;
    struct vl_screen *vscreen;
    struct vl_context *vctx;
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 7123e3b0634..3d0bb7ab67e 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -36,6 +36,7 @@
 #include <util/u_memory.h>
 #include <util/u_math.h>
 #include <util/u_format.h>
+#include <util/u_sampler.h>
 #include "xvmc_private.h"
 
 #define FOURCC_RGB 0x0000003
@@ -190,6 +191,27 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    tex = vpipe->screen->resource_create(vpipe->screen, &tex_templ);
 
    memset(&sampler_templ, 0, sizeof(sampler_templ));
+   u_sampler_view_default_template(&sampler_templ, tex, tex->format);
+
+#if 0
+   switch (image->id) {
+      case FOURCC_RGB:
+         assert(subpicture_priv->sfc->format == XvIDToPipe(image->id));
+         for (y = 0; y < height; ++y) {
+            dst_line = dst;
+            for (x = 0; x < width; ++x, src += 3, dst_line += 4) {
+               dst_line[0] = src[2]; /* B */
+               dst_line[1] = src[1]; /* G */
+               dst_line[2] = src[0]; /* R */
+            }
+            dst += xfer->stride;
+         }
+         break;
+      default:
+         XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized Xv image ID 0x%08X.\n", image->id);
+   }
+#endif
+
    subpicture_priv->sampler = vpipe->create_sampler_view(vpipe, tex, &sampler_templ);
    pipe_resource_reference(&tex, NULL);
    if (!subpicture_priv->sampler) {
@@ -220,6 +242,7 @@ Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, sh
 {
    XvMCSubpicturePrivate *subpicture_priv;
    XvMCContextPrivate *context_priv;
+   struct pipe_box dst_box = {x, y, 0, width, height, 1};
    float color_f[4];
 
    assert(dpy);
@@ -235,9 +258,9 @@ Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, sh
    subpicture_priv = subpicture->privData;
    context_priv = subpicture_priv->context->privData;
    /* TODO: Assert clear rect is within bounds? Or clip? */
-   //context_priv->vctx->vpipe->clear_render_target(context_priv->vctx->vpipe,
-   //                                               subpicture_priv->sampler, x, y,
-   //                                               color_f, width, height);
+   context_priv->vctx->vpipe->clear_sampler(context_priv->vctx->vpipe,
+                                            subpicture_priv->sampler, &dst_box,
+                                            color_f);
 
    return Success;
 }
@@ -250,9 +273,6 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
    XvMCSubpicturePrivate *subpicture_priv;
    XvMCContextPrivate *context_priv;
    struct pipe_video_context *vpipe;
-
-   unsigned char *src, *dst, *dst_line;
-   unsigned x, y;
    struct pipe_box dst_box = {dstx, dsty, 0, width, height, 1};
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Compositing subpicture %p.\n", subpicture);
@@ -279,25 +299,6 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
    vpipe->upload_sampler(vpipe, subpicture_priv->sampler, &dst_box,
                          image->data, width*3, srcx, srcy);
 
-#if 0
-   switch (image->id) {
-      case FOURCC_RGB:
-         assert(subpicture_priv->sfc->format == XvIDToPipe(image->id));
-         for (y = 0; y < height; ++y) {
-            dst_line = dst;
-            for (x = 0; x < width; ++x, src += 3, dst_line += 4) {
-               dst_line[0] = src[2]; /* B */
-               dst_line[1] = src[1]; /* G */
-               dst_line[2] = src[0]; /* R */
-            }
-            dst += xfer->stride;
-         }
-         break;
-      default:
-         XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized Xv image ID 0x%08X.\n", image->id);
-   }
-#endif
-
    XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p composited.\n", subpicture);
 
    return Success;
-- 
cgit v1.2.3


From 133add9c508f9e94e04c45a6cfa3a9dd6a2518d0 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 26 Mar 2011 00:30:22 +0100
Subject: [g3dvl] the sampler textview map is no longer needed

---
 src/gallium/auxiliary/vl/vl_compositor.c | 52 +++-----------------------------
 src/gallium/auxiliary/vl/vl_compositor.h |  2 --
 2 files changed, 4 insertions(+), 50 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 278f86bc54a..efee10b9542 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -297,21 +297,6 @@ cleanup_buffers(struct vl_compositor *c)
    pipe_resource_reference(&c->fs_const_buf, NULL);
 }
 
-static void
-texview_map_delete(const struct keymap *map,
-                   const void *key, void *data,
-                   void *user)
-{
-   struct pipe_sampler_view *sv = (struct pipe_sampler_view*)data;
-
-   assert(map);
-   assert(key);
-   assert(data);
-   assert(user);
-
-   pipe_sampler_view_reference(&sv, NULL);
-}
-
 bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe)
 {
    unsigned i;
@@ -322,22 +307,14 @@ bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *p
 
    compositor->pipe = pipe;
 
-   compositor->texview_map = util_new_keymap(sizeof(struct pipe_surface*), -1,
-                                             texview_map_delete);
-   if (!compositor->texview_map)
+   if (!init_pipe_state(compositor))
       return false;
 
-   if (!init_pipe_state(compositor)) {
-      util_delete_keymap(compositor->texview_map, compositor->pipe);
-      return false;
-   }
    if (!init_shaders(compositor)) {
-      util_delete_keymap(compositor->texview_map, compositor->pipe);
       cleanup_pipe_state(compositor);
       return false;
    }
    if (!init_buffers(compositor)) {
-      util_delete_keymap(compositor->texview_map, compositor->pipe);
       cleanup_shaders(compositor);
       cleanup_pipe_state(compositor);
       return false;
@@ -356,7 +333,6 @@ void vl_compositor_cleanup(struct vl_compositor *compositor)
 {
    assert(compositor);
 
-   util_delete_keymap(compositor->texview_map, compositor->pipe);
    cleanup_buffers(compositor);
    cleanup_shaders(compositor);
    cleanup_pipe_state(compositor);
@@ -517,8 +493,8 @@ static void draw_layers(struct vl_compositor *c,
                         struct pipe_video_rect *dst_rect)
 {
    unsigned num_rects;
-   struct pipe_sampler_view *src_surfaces[VL_COMPOSITOR_MAX_LAYERS + 2];
-   void *frag_shaders[VL_COMPOSITOR_MAX_LAYERS + 2];
+   struct pipe_sampler_view *src_surfaces[VL_COMPOSITOR_MAX_LAYERS + 1];
+   void *frag_shaders[VL_COMPOSITOR_MAX_LAYERS + 1];
    unsigned i;
 
    assert(c);
@@ -529,30 +505,10 @@ static void draw_layers(struct vl_compositor *c,
    num_rects = gen_data(c, src_surface, src_rect, dst_rect, src_surfaces, frag_shaders);
 
    for (i = 0; i < num_rects; ++i) {
-      boolean delete_view = FALSE;
-      struct pipe_sampler_view *surface_view = (struct pipe_sampler_view*)util_keymap_lookup(c->texview_map,
-                                                                                             &src_surfaces[i]);
-      if (!surface_view) {
-         struct pipe_sampler_view templat;
-         u_sampler_view_default_template(&templat, src_surfaces[i]->texture,
-                                         src_surfaces[i]->texture->format);
-         surface_view = c->pipe->create_sampler_view(c->pipe, src_surfaces[i]->texture,
-                                                     &templat);
-         if (!surface_view)
-            return;
-
-         delete_view = !util_keymap_insert(c->texview_map, &src_surfaces[i],
-                                           surface_view, c->pipe);
-      }
-
       c->pipe->bind_fs_state(c->pipe, frag_shaders[i]);
-      c->pipe->set_fragment_sampler_views(c->pipe, 1, &surface_view);
+      c->pipe->set_fragment_sampler_views(c->pipe, 1, &src_surfaces[i]);
 
       util_draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
-
-      if (delete_view) {
-         pipe_sampler_view_reference(&surface_view, NULL);
-      }
    }
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 8bea7ab39cf..59e60ac69f3 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -61,8 +61,6 @@ struct vl_compositor
    struct pipe_video_rect layer_src_rects[VL_COMPOSITOR_MAX_LAYERS];
    struct pipe_video_rect layer_dst_rects[VL_COMPOSITOR_MAX_LAYERS];
    unsigned dirty_layers;
-
-   struct keymap *texview_map;
 };
 
 bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe);
-- 
cgit v1.2.3


From 05a2c182f1410a6c09eba70877311ceaf80c19c5 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 26 Mar 2011 01:01:18 +0100
Subject: [g3dvl] use quads instead of triangles for the compositor

---
 src/gallium/auxiliary/vl/vl_compositor.c | 73 +++++++++++---------------------
 1 file changed, 24 insertions(+), 49 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index efee10b9542..d65524b9918 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -254,7 +254,7 @@ init_buffers(struct vl_compositor *c)
       c->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
       PIPE_USAGE_STATIC,
-      sizeof(struct vertex4f) * (VL_COMPOSITOR_MAX_LAYERS + 2) * 6
+      sizeof(struct vertex4f) * (VL_COMPOSITOR_MAX_LAYERS + 1) * 4
    );
 
    vertex_elems[0].src_offset = 0;
@@ -338,23 +338,6 @@ void vl_compositor_cleanup(struct vl_compositor *compositor)
    cleanup_pipe_state(compositor);
 }
 
-#if 0
-void vl_compositor_set_background(struct vl_compositor *compositor,
-                                 struct pipe_surface *bg, struct pipe_video_rect *bg_src_rect)
-{
-   assert(compositor);
-   assert((bg && bg_src_rect) || (!bg && !bg_src_rect));
-
-   if (compositor->bg != bg ||
-       !u_video_rects_equal(&compositor->bg_src_rect, bg_src_rect)) {
-      pipe_surface_reference(&compositor->bg, bg);
-      /*if (!u_video_rects_equal(&compositor->bg_src_rect, bg_src_rect))*/
-         compositor->bg_src_rect = *bg_src_rect;
-      compositor->dirty_bg = true;
-   }
-}
-#endif
-
 void vl_compositor_set_layers(struct vl_compositor *compositor,
                               struct pipe_sampler_view *layers[],
                               struct pipe_video_rect *src_rects[],
@@ -396,41 +379,33 @@ static void gen_rect_verts(unsigned pos,
                            struct vertex2f *dst_inv_size,
                            struct vertex4f *vb)
 {
-   assert(pos < VL_COMPOSITOR_MAX_LAYERS + 2);
+   assert(pos < VL_COMPOSITOR_MAX_LAYERS + 1);
    assert(src_rect);
    assert(src_inv_size);
    assert((dst_rect && dst_inv_size) /*|| (!dst_rect && !dst_inv_size)*/);
    assert(vb);
 
-   vb[pos * 6 + 0].x = dst_rect->x * dst_inv_size->x;
-   vb[pos * 6 + 0].y = dst_rect->y * dst_inv_size->y;
-   vb[pos * 6 + 0].z = src_rect->x * src_inv_size->x;
-   vb[pos * 6 + 0].w = src_rect->y * src_inv_size->y;
-
-   vb[pos * 6 + 1].x = dst_rect->x * dst_inv_size->x;
-   vb[pos * 6 + 1].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
-   vb[pos * 6 + 1].z = src_rect->x * src_inv_size->x;
-   vb[pos * 6 + 1].w = (src_rect->y + src_rect->h) * src_inv_size->y;
-
-   vb[pos * 6 + 2].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
-   vb[pos * 6 + 2].y = dst_rect->y * dst_inv_size->y;
-   vb[pos * 6 + 2].z = (src_rect->x + src_rect->w) * src_inv_size->x;
-   vb[pos * 6 + 2].w = src_rect->y * src_inv_size->y;
-
-   vb[pos * 6 + 3].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
-   vb[pos * 6 + 3].y = dst_rect->y * dst_inv_size->y;
-   vb[pos * 6 + 3].z = (src_rect->x + src_rect->w) * src_inv_size->x;
-   vb[pos * 6 + 3].w = src_rect->y * src_inv_size->y;
-
-   vb[pos * 6 + 4].x = dst_rect->x * dst_inv_size->x;
-   vb[pos * 6 + 4].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
-   vb[pos * 6 + 4].z = src_rect->x * src_inv_size->x;
-   vb[pos * 6 + 4].w = (src_rect->y + src_rect->h) * src_inv_size->y;
-
-   vb[pos * 6 + 5].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
-   vb[pos * 6 + 5].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
-   vb[pos * 6 + 5].z = (src_rect->x + src_rect->w) * src_inv_size->x;
-   vb[pos * 6 + 5].w = (src_rect->y + src_rect->h) * src_inv_size->y;
+   vb += pos * 4;
+
+   vb[0].x = dst_rect->x * dst_inv_size->x;
+   vb[0].y = dst_rect->y * dst_inv_size->y;
+   vb[0].z = src_rect->x * src_inv_size->x;
+   vb[0].w = src_rect->y * src_inv_size->y;
+
+   vb[1].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
+   vb[1].y = dst_rect->y * dst_inv_size->y;
+   vb[1].z = (src_rect->x + src_rect->w) * src_inv_size->x;
+   vb[1].w = src_rect->y * src_inv_size->y;
+
+   vb[2].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
+   vb[2].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
+   vb[2].z = (src_rect->x + src_rect->w) * src_inv_size->x;
+   vb[2].w = (src_rect->y + src_rect->h) * src_inv_size->y;
+
+   vb[3].x = dst_rect->x * dst_inv_size->x;
+   vb[3].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
+   vb[3].z = src_rect->x * src_inv_size->x;
+   vb[3].w = (src_rect->y + src_rect->h) * src_inv_size->y;
 }
 
 static unsigned gen_data(struct vl_compositor *c,
@@ -508,7 +483,7 @@ static void draw_layers(struct vl_compositor *c,
       c->pipe->bind_fs_state(c->pipe, frag_shaders[i]);
       c->pipe->set_fragment_sampler_views(c->pipe, 1, &src_surfaces[i]);
 
-      util_draw_arrays(c->pipe, PIPE_PRIM_TRIANGLES, i * 6, 6);
+      util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, i * 4, 4);
    }
 }
 
-- 
cgit v1.2.3


From adbc9cee0ddf5a542d3e503db673af30e6d8df5b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 26 Mar 2011 01:26:52 +0100
Subject: [g3dvl] correct layer size calculation

---
 src/gallium/auxiliary/vl/vl_compositor.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index d65524b9918..673f9bdecd2 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -372,21 +372,17 @@ void vl_compositor_set_layers(struct vl_compositor *compositor,
       pipe_sampler_view_reference(&compositor->layers[i], NULL);
 }
 
-static void gen_rect_verts(unsigned pos,
-                           struct pipe_video_rect *src_rect,
+static void gen_rect_verts(struct pipe_video_rect *src_rect,
                            struct vertex2f *src_inv_size,
                            struct pipe_video_rect *dst_rect,
                            struct vertex2f *dst_inv_size,
                            struct vertex4f *vb)
 {
-   assert(pos < VL_COMPOSITOR_MAX_LAYERS + 1);
    assert(src_rect);
    assert(src_inv_size);
    assert((dst_rect && dst_inv_size) /*|| (!dst_rect && !dst_inv_size)*/);
    assert(vb);
 
-   vb += pos * 4;
-
    vb[0].x = dst_rect->x * dst_inv_size->x;
    vb[0].y = dst_rect->y * dst_inv_size->y;
    vb[0].z = src_rect->x * src_inv_size->x;
@@ -415,7 +411,7 @@ static unsigned gen_data(struct vl_compositor *c,
                          struct pipe_sampler_view **textures,
                          void **frag_shaders)
 {
-   void *vb;
+   struct vertex4f *vb;
    struct pipe_transfer *buf_transfer;
    unsigned num_rects = 0;
    unsigned i;
@@ -435,11 +431,12 @@ static unsigned gen_data(struct vl_compositor *c,
 
    {
       struct vertex2f src_inv_size = { 1.0f / src_surface->texture->width0, 1.0f / src_surface->texture->height0};
-      gen_rect_verts(num_rects, src_rect, &src_inv_size, dst_rect, &c->fb_inv_size, vb);
+      gen_rect_verts(src_rect, &src_inv_size, dst_rect, &c->fb_inv_size, vb);
       textures[num_rects] = src_surface;
       /* XXX: Hack, sort of */
       frag_shaders[num_rects] = c->fragment_shader.ycbcr_2_rgb;
       ++num_rects;
+      vb += 4;
    }
 
    for (i = 0; c->dirty_layers > 0; i++) {
@@ -447,12 +444,12 @@ static unsigned gen_data(struct vl_compositor *c,
 
       if (c->dirty_layers & (1 << i)) {
          struct vertex2f layer_inv_size = {1.0f / c->layers[i]->texture->width0, 1.0f / c->layers[i]->texture->height0};
-         gen_rect_verts(num_rects, &c->layer_src_rects[i], &layer_inv_size,
-                        &c->layer_dst_rects[i], &c->fb_inv_size, vb);
+         gen_rect_verts(&c->layer_src_rects[i], &layer_inv_size, &c->layer_dst_rects[i], &layer_inv_size, vb);
          textures[num_rects] = c->layers[i];
          /* XXX: Hack */
          frag_shaders[num_rects] = c->fragment_shader.rgb_2_rgb;
          ++num_rects;
+         vb += 4;
          c->dirty_layers &= ~(1 << i);
       }
    }
-- 
cgit v1.2.3


From c001c393713f36144701f3a61b6c7de7811898ee Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 26 Mar 2011 02:03:16 +0100
Subject: [g3dvl] add blend state to compositor

This seems to get at least mplayer working
---
 src/gallium/auxiliary/vl/vl_compositor.c     | 21 ++++++++++++++++++++-
 src/gallium/auxiliary/vl/vl_compositor.h     |  1 +
 src/gallium/auxiliary/vl/vl_mpeg12_context.c |  2 +-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 673f9bdecd2..b1adef99700 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -126,8 +126,9 @@ create_frag_shader_ycbcr_2_rgb(struct vl_compositor *c)
     * fragment = csc * texel
     */
    ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
-   for (i = 0; i < 4; ++i)
+   for (i = 0; i < 3; ++i)
       ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));
+   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
 
    ureg_release_temporary(shader, texel);
    ureg_END(shader);
@@ -172,6 +173,7 @@ static bool
 init_pipe_state(struct vl_compositor *c)
 {
    struct pipe_sampler_state sampler;
+   struct pipe_blend_state blend;
 
    assert(c);
 
@@ -195,6 +197,21 @@ init_pipe_state(struct vl_compositor *c)
    /*sampler.max_anisotropy = ;*/
    c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler);
 
+   memset(&blend, 0, sizeof blend);
+   blend.independent_blend_enable = 0;
+   blend.rt[0].blend_enable = 1;
+   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_INV_SRC_ALPHA;
+   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.logicop_enable = 0;
+   blend.logicop_func = PIPE_LOGICOP_CLEAR;
+   blend.rt[0].colormask = PIPE_MASK_RGBA;
+   blend.dither = 0;
+   c->blend = c->pipe->create_blend_state(c->pipe, &blend);
+
    return true;
 }
 
@@ -203,6 +220,7 @@ static void cleanup_pipe_state(struct vl_compositor *c)
    assert(c);
 
    c->pipe->delete_sampler_state(c->pipe, c->sampler);
+   c->pipe->delete_blend_state(c->pipe, c->blend);
 }
 
 static bool
@@ -476,6 +494,7 @@ static void draw_layers(struct vl_compositor *c,
 
    num_rects = gen_data(c, src_surface, src_rect, dst_rect, src_surfaces, frag_shaders);
 
+   c->pipe->bind_blend_state(c->pipe, c->blend);
    for (i = 0; i < num_rects; ++i) {
       c->pipe->bind_fs_state(c->pipe, frag_shaders[i]);
       c->pipe->set_fragment_sampler_views(c->pipe, 1, &src_surfaces[i]);
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 59e60ac69f3..aa1e480ed4c 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -45,6 +45,7 @@ struct vl_compositor
    struct pipe_framebuffer_state fb_state;
    struct vertex2f fb_inv_size;
    void *sampler;
+   void *blend;
    struct pipe_sampler_view *sampler_view;
    void *vertex_shader;
    struct
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index af1079e480b..6d4a7713068 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -179,6 +179,7 @@ vl_mpeg12_buffer_flush(struct pipe_video_buffer *buffer,
 
    ctx->pipe->set_vertex_buffers(ctx->pipe, 2, buf->vertex_bufs.all);
    ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
+   ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
    vl_idct_flush(&ctx->idct_y, &buf->idct_y, ne_num);
    vl_idct_flush(&ctx->idct_cr, &buf->idct_cr, ne_num);
    vl_idct_flush(&ctx->idct_cb, &buf->idct_cb, ne_num);
@@ -543,7 +544,6 @@ init_pipe_state(struct vl_mpeg12_context *ctx)
    blend.rt[0].colormask = PIPE_MASK_RGBA;
    blend.dither = 0;
    ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend);
-   ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
 
    memset(&dsa, 0, sizeof dsa);
    dsa.depth.enabled = 0;
-- 
cgit v1.2.3


From 5f23328a8ad991ef1c70f045865b6a5e13323b67 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 26 Mar 2011 10:58:38 +0100
Subject: [g3dvl] get sampler swizzle right for subpictures

This should make ai44 work correctly.
---
 src/gallium/state_trackers/xorg/xvmc/subpicture.c | 35 ++++++++++++++---------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 3d0bb7ab67e..260303081d4 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -193,24 +193,31 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    memset(&sampler_templ, 0, sizeof(sampler_templ));
    u_sampler_view_default_template(&sampler_templ, tex, tex->format);
 
-#if 0
-   switch (image->id) {
+   switch (xvimage_id) {
       case FOURCC_RGB:
-         assert(subpicture_priv->sfc->format == XvIDToPipe(image->id));
-         for (y = 0; y < height; ++y) {
-            dst_line = dst;
-            for (x = 0; x < width; ++x, src += 3, dst_line += 4) {
-               dst_line[0] = src[2]; /* B */
-               dst_line[1] = src[1]; /* G */
-               dst_line[2] = src[0]; /* R */
-            }
-            dst += xfer->stride;
-         }
+         sampler_templ.swizzle_r = PIPE_SWIZZLE_BLUE;
+         sampler_templ.swizzle_g = PIPE_SWIZZLE_GREEN;
+         sampler_templ.swizzle_b = PIPE_SWIZZLE_RED;
+         sampler_templ.swizzle_a = PIPE_SWIZZLE_ONE;
+         break;
+
+      case FOURCC_AI44:
+         sampler_templ.swizzle_r = PIPE_SWIZZLE_ALPHA;
+         sampler_templ.swizzle_g = PIPE_SWIZZLE_ALPHA;
+         sampler_templ.swizzle_b = PIPE_SWIZZLE_ALPHA;
+         sampler_templ.swizzle_a = PIPE_SWIZZLE_RED;
          break;
+
+      case FOURCC_IA44:
+         sampler_templ.swizzle_r = PIPE_SWIZZLE_RED;
+         sampler_templ.swizzle_g = PIPE_SWIZZLE_RED;
+         sampler_templ.swizzle_b = PIPE_SWIZZLE_RED;
+         sampler_templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
+         break;
+
       default:
-         XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized Xv image ID 0x%08X.\n", image->id);
+         XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized Xv image ID 0x%08X.\n", xvimage_id);
    }
-#endif
 
    subpicture_priv->sampler = vpipe->create_sampler_view(vpipe, tex, &sampler_templ);
    pipe_resource_reference(&tex, NULL);
-- 
cgit v1.2.3


From 849a0b0a821ecc59a50fe53498a6354cfd0b24a4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 26 Mar 2011 11:46:07 +0100
Subject: [g3dvl] start implementing palettes for subpictures

---
 src/gallium/state_trackers/xorg/xvmc/subpicture.c  | 68 ++++++++++++++++++++--
 .../state_trackers/xorg/xvmc/xvmc_private.h        |  3 +
 2 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 260303081d4..07643b66848 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -57,22 +57,45 @@ static enum pipe_format XvIDToPipe(int xvimage_id)
    }
 }
 
+static unsigned NumPaletteEntries4XvID(int xvimage_id)
+{
+   switch (xvimage_id) {
+      case FOURCC_RGB:
+         return 0;
+
+      case FOURCC_AI44:
+      case FOURCC_IA44:
+         return 16;
+
+      default:
+         XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized Xv image ID 0x%08X.\n", xvimage_id);
+         return 0;
+   }
+}
+
 static int PipeToComponentOrder(enum pipe_format format, char *component_order)
 {
    assert(component_order);
 
    switch (format) {
       case PIPE_FORMAT_B8G8R8X8_UNORM:
-      case PIPE_FORMAT_L4A4_UNORM:
          return 0;
+
+      case PIPE_FORMAT_L4A4_UNORM:
+         component_order[0] = PIPE_SWIZZLE_RED;
+         component_order[1] = PIPE_SWIZZLE_GREEN;
+         component_order[2] = PIPE_SWIZZLE_BLUE;
+         component_order[3] = PIPE_SWIZZLE_ALPHA;
+         return 4;
+
       default:
          XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized PIPE_FORMAT 0x%08X.\n", format);
          component_order[0] = 0;
          component_order[1] = 0;
          component_order[2] = 0;
          component_order[3] = 0;
+         return 0;
    }
-   return 0;
 }
 
 static Status Validate(Display *dpy, XvPortID port, int surface_type_id, int xvimage_id)
@@ -232,10 +255,29 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    subpicture->xvimage_id = xvimage_id;
    subpicture->width = width;
    subpicture->height = height;
-   subpicture->num_palette_entries = 0;
+   subpicture->num_palette_entries = NumPaletteEntries4XvID(xvimage_id);
    subpicture->entry_bytes = PipeToComponentOrder(tex_templ.format, subpicture->component_order);
    subpicture->privData = subpicture_priv;
 
+   if (subpicture->num_palette_entries > 0) {
+      tex_templ.target = PIPE_TEXTURE_1D;
+      tex_templ.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+      tex_templ.width0 = subpicture->num_palette_entries;
+      tex_templ.height0 = 1;
+      tex_templ.usage = PIPE_USAGE_STATIC;
+
+      tex = vpipe->screen->resource_create(vpipe->screen, &tex_templ);
+
+      memset(&sampler_templ, 0, sizeof(sampler_templ));
+      u_sampler_view_default_template(&sampler_templ, tex, tex->format);
+      subpicture_priv->palette = vpipe->create_sampler_view(vpipe, tex, &sampler_templ);
+      pipe_resource_reference(&tex, NULL);
+      if (!subpicture_priv->sampler) {
+         FREE(subpicture_priv);
+         return BadAlloc;
+      }
+   }
+
    SyncHandle();
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p created.\n", subpicture);
@@ -325,6 +367,7 @@ Status XvMCDestroySubpicture(Display *dpy, XvMCSubpicture *subpicture)
 
    subpicture_priv = subpicture->privData;
    pipe_sampler_view_reference(&subpicture_priv->sampler, NULL);
+   pipe_sampler_view_reference(&subpicture_priv->palette, NULL);
    FREE(subpicture_priv);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p destroyed.\n", subpicture);
@@ -335,15 +378,28 @@ Status XvMCDestroySubpicture(Display *dpy, XvMCSubpicture *subpicture)
 PUBLIC
 Status XvMCSetSubpicturePalette(Display *dpy, XvMCSubpicture *subpicture, unsigned char *palette)
 {
+   XvMCSubpicturePrivate *subpicture_priv;
+   XvMCContextPrivate *context_priv;
+   struct pipe_video_context *vpipe;
+   struct pipe_box dst_box = {0, 0, 0, 0, 1, 1};
+
    assert(dpy);
+   assert(palette);
 
    if (!subpicture)
       return XvMCBadSubpicture;
 
-   assert(palette);
+   subpicture_priv = subpicture->privData;
+   context_priv = subpicture_priv->context->privData;
+   vpipe = context_priv->vctx->vpipe;
 
-   /* We don't support paletted subpictures */
-   return BadMatch;
+   dst_box.width = subpicture->num_palette_entries;
+
+   vpipe->upload_sampler(vpipe, subpicture_priv->palette, &dst_box, palette, 0, 0, 0);
+
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Palette of Subpicture %p set.\n", subpicture);
+
+   return Success;
 }
 
 PUBLIC
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 68dfb0d355f..5e976cb8916 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -74,6 +74,9 @@ typedef struct
 {
    struct pipe_sampler_view *sampler;
 
+   /* optional palette for this subpicture */
+   struct pipe_sampler_view *palette;
+
    /* The surface this subpicture is currently associated with, if any. */
    XvMCSurface *surface;
 
-- 
cgit v1.2.3


From 9a59f22d114e11a84c99609013ffe00f709c998b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 26 Mar 2011 11:53:27 +0100
Subject: [g3dvl] correct subpicture stride in upload

---
 src/gallium/state_trackers/xorg/xvmc/subpicture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 07643b66848..f2bb845cb7a 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -346,7 +346,7 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
 
    /* TODO: Assert rects are within bounds? Or clip? */
    vpipe->upload_sampler(vpipe, subpicture_priv->sampler, &dst_box,
-                         image->data, width*3, srcx, srcy);
+                         image->data, image->pitches[0], srcx, srcy);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p composited.\n", subpicture);
 
-- 
cgit v1.2.3


From 7f426615ab308de508f672567094b8b21d836a9b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 26 Mar 2011 12:36:01 +0100
Subject: [g3dvl] fully implement paletted subpictures

---
 src/gallium/auxiliary/vl/vl_compositor.c          | 77 ++++++++++++++++++++---
 src/gallium/auxiliary/vl/vl_compositor.h          |  3 +
 src/gallium/auxiliary/vl/vl_mpeg12_context.c      |  3 +-
 src/gallium/include/pipe/p_video_context.h        |  1 +
 src/gallium/state_trackers/xorg/xvmc/subpicture.c |  8 +--
 src/gallium/state_trackers/xorg/xvmc/surface.c    |  4 +-
 6 files changed, 79 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index b1adef99700..b0e0b3bfa72 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -140,6 +140,43 @@ create_frag_shader_ycbcr_2_rgb(struct vl_compositor *c)
    return true;
 }
 
+static bool
+create_frag_shader_palette_2_rgb(struct vl_compositor *c)
+{
+   struct ureg_program *shader;
+   struct ureg_src tc;
+   struct ureg_src sampler;
+   struct ureg_src palette;
+   struct ureg_dst texel;
+   struct ureg_dst fragment;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return false;
+
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
+   sampler = ureg_DECL_sampler(shader, 0);
+   palette = ureg_DECL_sampler(shader, 1);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+   texel = ureg_DECL_temporary(shader);
+
+   /*
+    * fragment = tex(tc, sampler)
+    */
+   ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
+   ureg_TEX(shader, fragment, TGSI_TEXTURE_1D, ureg_src(texel), palette);
+   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(texel));
+
+   ureg_release_temporary(shader, texel);
+   ureg_END(shader);
+
+   c->fragment_shader.palette_2_rgb = ureg_create_shader_and_destroy(shader, c->pipe);
+   if (!c->fragment_shader.palette_2_rgb)
+      return false;
+
+   return true;
+}
+
 static bool
 create_frag_shader_rgb_2_rgb(struct vl_compositor *c)
 {
@@ -236,6 +273,10 @@ init_shaders(struct vl_compositor *c)
       debug_printf("Unable to create YCbCr-to-RGB fragment shader.\n");
       return false;
    }
+   if (!create_frag_shader_palette_2_rgb(c)) {
+      debug_printf("Unable to create Palette-to-RGB fragment shader.\n");
+      return false;
+   }
    if (!create_frag_shader_rgb_2_rgb(c)) {
       debug_printf("Unable to create RGB-to-RGB fragment shader.\n");
       return false;
@@ -250,6 +291,7 @@ static void cleanup_shaders(struct vl_compositor *c)
 
    c->pipe->delete_vs_state(c->pipe, c->vertex_shader);
    c->pipe->delete_fs_state(c->pipe, c->fragment_shader.ycbcr_2_rgb);
+   c->pipe->delete_fs_state(c->pipe, c->fragment_shader.palette_2_rgb);
    c->pipe->delete_fs_state(c->pipe, c->fragment_shader.rgb_2_rgb);
 }
 
@@ -358,6 +400,7 @@ void vl_compositor_cleanup(struct vl_compositor *compositor)
 
 void vl_compositor_set_layers(struct vl_compositor *compositor,
                               struct pipe_sampler_view *layers[],
+                              struct pipe_sampler_view *palettes[],
                               struct pipe_video_rect *src_rects[],
                               struct pipe_video_rect *dst_rects[],
                               unsigned num_layers)
@@ -373,10 +416,12 @@ void vl_compositor_set_layers(struct vl_compositor *compositor,
              (!layers[i] && !src_rects[i] && !dst_rects[i]));
 
       if (compositor->layers[i] != layers[i] ||
+          compositor->palettes[i] != palettes[i] ||
           !u_video_rects_equal(&compositor->layer_src_rects[i], src_rects[i]) ||
           !u_video_rects_equal(&compositor->layer_dst_rects[i], dst_rects[i]))
       {
          pipe_sampler_view_reference(&compositor->layers[i], layers[i]);
+         pipe_sampler_view_reference(&compositor->palettes[i], palettes[i]);
          compositor->layer_src_rects[i] = *src_rects[i];
          compositor->layer_dst_rects[i] = *dst_rects[i];
          compositor->dirty_layers |= 1 << i;
@@ -386,8 +431,10 @@ void vl_compositor_set_layers(struct vl_compositor *compositor,
          compositor->dirty_layers |= 1 << i;
    }
 
-   for (; i < VL_COMPOSITOR_MAX_LAYERS; ++i)
+   for (; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
       pipe_sampler_view_reference(&compositor->layers[i], NULL);
+      pipe_sampler_view_reference(&compositor->palettes[i], NULL);
+   }
 }
 
 static void gen_rect_verts(struct pipe_video_rect *src_rect,
@@ -426,7 +473,7 @@ static unsigned gen_data(struct vl_compositor *c,
                          struct pipe_sampler_view *src_surface,
                          struct pipe_video_rect *src_rect,
                          struct pipe_video_rect *dst_rect,
-                         struct pipe_sampler_view **textures,
+                         struct pipe_sampler_view *textures[VL_COMPOSITOR_MAX_LAYERS + 1][2],
                          void **frag_shaders)
 {
    struct vertex4f *vb;
@@ -450,7 +497,8 @@ static unsigned gen_data(struct vl_compositor *c,
    {
       struct vertex2f src_inv_size = { 1.0f / src_surface->texture->width0, 1.0f / src_surface->texture->height0};
       gen_rect_verts(src_rect, &src_inv_size, dst_rect, &c->fb_inv_size, vb);
-      textures[num_rects] = src_surface;
+      textures[num_rects][0] = src_surface;
+      textures[num_rects][1] = NULL;
       /* XXX: Hack, sort of */
       frag_shaders[num_rects] = c->fragment_shader.ycbcr_2_rgb;
       ++num_rects;
@@ -463,9 +511,14 @@ static unsigned gen_data(struct vl_compositor *c,
       if (c->dirty_layers & (1 << i)) {
          struct vertex2f layer_inv_size = {1.0f / c->layers[i]->texture->width0, 1.0f / c->layers[i]->texture->height0};
          gen_rect_verts(&c->layer_src_rects[i], &layer_inv_size, &c->layer_dst_rects[i], &layer_inv_size, vb);
-         textures[num_rects] = c->layers[i];
-         /* XXX: Hack */
-         frag_shaders[num_rects] = c->fragment_shader.rgb_2_rgb;
+         textures[num_rects][0] = c->layers[i];
+         textures[num_rects][1] = c->palettes[i];
+
+         if (c->palettes[i])
+            frag_shaders[num_rects] = c->fragment_shader.palette_2_rgb;
+         else
+            frag_shaders[num_rects] = c->fragment_shader.rgb_2_rgb;
+
          ++num_rects;
          vb += 4;
          c->dirty_layers &= ~(1 << i);
@@ -483,7 +536,7 @@ static void draw_layers(struct vl_compositor *c,
                         struct pipe_video_rect *dst_rect)
 {
    unsigned num_rects;
-   struct pipe_sampler_view *src_surfaces[VL_COMPOSITOR_MAX_LAYERS + 1];
+   struct pipe_sampler_view *surfaces[VL_COMPOSITOR_MAX_LAYERS + 1][2];
    void *frag_shaders[VL_COMPOSITOR_MAX_LAYERS + 1];
    unsigned i;
 
@@ -492,12 +545,12 @@ static void draw_layers(struct vl_compositor *c,
    assert(src_rect);
    assert(dst_rect);
 
-   num_rects = gen_data(c, src_surface, src_rect, dst_rect, src_surfaces, frag_shaders);
+   num_rects = gen_data(c, src_surface, src_rect, dst_rect, surfaces, frag_shaders);
 
    c->pipe->bind_blend_state(c->pipe, c->blend);
    for (i = 0; i < num_rects; ++i) {
       c->pipe->bind_fs_state(c->pipe, frag_shaders[i]);
-      c->pipe->set_fragment_sampler_views(c->pipe, 1, &src_surfaces[i]);
+      c->pipe->set_fragment_sampler_views(c->pipe, surfaces[i][1] ? 2 : 1, &surfaces[i][0]);
 
       util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, i * 4, 4);
    }
@@ -511,6 +564,8 @@ void vl_compositor_render(struct vl_compositor          *compositor,
                           struct pipe_video_rect        *dst_area,
                           struct pipe_fence_handle      **fence)
 {
+   void *samplers[2];
+
    assert(compositor);
    assert(src_surface);
    assert(src_area);
@@ -538,9 +593,11 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    compositor->viewport.translate[2] = 0;
    compositor->viewport.translate[3] = 0;
 
+   samplers[0] = samplers[1] = compositor->sampler;
+
    compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state);
    compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport);
-   compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 1, &compositor->sampler);
+   compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 2, &samplers[0]);
    compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
    compositor->pipe->set_vertex_buffers(compositor->pipe, 1, &compositor->vertex_buf);
    compositor->pipe->bind_vertex_elements_state(compositor->pipe, compositor->vertex_elems_state);
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index aa1e480ed4c..249eb685b40 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -52,6 +52,7 @@ struct vl_compositor
    {
       void *ycbcr_2_rgb;
       void *rgb_2_rgb;
+      void *palette_2_rgb;
    } fragment_shader;
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buf;
@@ -59,6 +60,7 @@ struct vl_compositor
    struct pipe_resource *fs_const_buf;
 
    struct pipe_sampler_view *layers[VL_COMPOSITOR_MAX_LAYERS];
+   struct pipe_sampler_view *palettes[VL_COMPOSITOR_MAX_LAYERS];
    struct pipe_video_rect layer_src_rects[VL_COMPOSITOR_MAX_LAYERS];
    struct pipe_video_rect layer_dst_rects[VL_COMPOSITOR_MAX_LAYERS];
    unsigned dirty_layers;
@@ -70,6 +72,7 @@ void vl_compositor_cleanup(struct vl_compositor *compositor);
 
 void vl_compositor_set_layers(struct vl_compositor *compositor,
                               struct pipe_sampler_view *layers[],
+                              struct pipe_sampler_view *palettes[],
                               struct pipe_video_rect *src_rects[],
                               struct pipe_video_rect *dst_rects[],
                               unsigned num_layers);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 6d4a7713068..7fd3a0377c9 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -463,6 +463,7 @@ vl_mpeg12_render_picture(struct pipe_video_context     *vpipe,
 static void
 vl_mpeg12_set_picture_layers(struct pipe_video_context *vpipe,
                              struct pipe_sampler_view *layers[],
+                             struct pipe_sampler_view *palettes[],
                              struct pipe_video_rect *src_rects[],
                              struct pipe_video_rect *dst_rects[],
                              unsigned num_layers)
@@ -473,7 +474,7 @@ vl_mpeg12_set_picture_layers(struct pipe_video_context *vpipe,
    assert((layers && src_rects && dst_rects) ||
           (!layers && !src_rects && !dst_rects));
 
-   vl_compositor_set_layers(&ctx->compositor, layers, src_rects, dst_rects, num_layers);
+   vl_compositor_set_layers(&ctx->compositor, layers, palettes, src_rects, dst_rects, num_layers);
 }
 
 static void
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 49b1038eea7..09e2d2702c7 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -150,6 +150,7 @@ struct pipe_video_context
     */
    void (*set_picture_layers)(struct pipe_video_context *vpipe,
                               struct pipe_sampler_view *layers[],
+                              struct pipe_sampler_view *palettes[],
                               struct pipe_video_rect *src_rects[],
                               struct pipe_video_rect *dst_rects[],
                               unsigned num_layers);
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index f2bb845cb7a..da9e87f50dd 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -226,15 +226,15 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
 
       case FOURCC_AI44:
          sampler_templ.swizzle_r = PIPE_SWIZZLE_ALPHA;
-         sampler_templ.swizzle_g = PIPE_SWIZZLE_ALPHA;
-         sampler_templ.swizzle_b = PIPE_SWIZZLE_ALPHA;
+         sampler_templ.swizzle_g = PIPE_SWIZZLE_ZERO;
+         sampler_templ.swizzle_b = PIPE_SWIZZLE_ZERO;
          sampler_templ.swizzle_a = PIPE_SWIZZLE_RED;
          break;
 
       case FOURCC_IA44:
          sampler_templ.swizzle_r = PIPE_SWIZZLE_RED;
-         sampler_templ.swizzle_g = PIPE_SWIZZLE_RED;
-         sampler_templ.swizzle_b = PIPE_SWIZZLE_RED;
+         sampler_templ.swizzle_g = PIPE_SWIZZLE_ZERO;
+         sampler_templ.swizzle_b = PIPE_SWIZZLE_ZERO;
          sampler_templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
          break;
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 6fb19124867..b3b594125a2 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -449,13 +449,13 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
       XVMC_MSG(XVMC_TRACE, "[XvMC] Surface %p has subpicture %p.\n", surface, surface_priv->subpicture);
 
       assert(subpicture_priv->surface == surface);
-      vpipe->set_picture_layers(vpipe, &subpicture_priv->sampler, src_rects, dst_rects, 1);
+      vpipe->set_picture_layers(vpipe, &subpicture_priv->sampler, &subpicture_priv->palette, src_rects, dst_rects, 1);
 
       surface_priv->subpicture = NULL;
       subpicture_priv->surface = NULL;
    }
    else
-      vpipe->set_picture_layers(vpipe, NULL, NULL, NULL, 0);
+      vpipe->set_picture_layers(vpipe, NULL, NULL, NULL, NULL, 0);
 
    unmap_and_flush_surface(surface_priv);
    vpipe->render_picture(vpipe, surface_priv->pipe_buffer, &src_rect, PictureToPipe(flags),
-- 
cgit v1.2.3


From e8a701f40b138d31050bcf778dc7d5857102f49b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 27 Mar 2011 00:19:25 +0100
Subject: [g3dvl] remove unused backbuffer from xvmc

---
 src/gallium/state_trackers/xorg/xvmc/context.c     |  1 -
 src/gallium/state_trackers/xorg/xvmc/surface.c     | 56 ----------------------
 .../state_trackers/xorg/xvmc/xvmc_private.h        |  1 -
 3 files changed, 58 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index b1d17cc9149..bcfd085de39 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -285,7 +285,6 @@ Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
 
    context_priv = context->privData;
    vctx = context_priv->vctx;
-   pipe_surface_reference(&context_priv->backbuffer, NULL);
    vscreen = vctx->vscreen;
    vl_video_destroy(vctx);
    vl_screen_destroy(vscreen);
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index b3b594125a2..67dc57d4344 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -95,57 +95,6 @@ static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, unsigned
    return -1;
 }
 
-#if 0
-static bool
-CreateOrResizeBackBuffer(struct vl_context *vctx, unsigned int width, unsigned int height,
-                         struct pipe_surface **backbuffer)
-{
-   struct pipe_video_context *vpipe;
-   struct pipe_resource template;
-   struct pipe_resource *tex;
-
-   assert(vctx);
-
-   vpipe = vctx->vpipe;
-
-   if (*backbuffer) {
-      if ((*backbuffer)->width != width || (*backbuffer)->height != height)
-         pipe_surface_reference(backbuffer, NULL);
-      else
-         return true;
-   }
-
-   memset(&template, 0, sizeof(struct pipe_resource));
-   template.target = PIPE_TEXTURE_2D;
-   template.format = vctx->vscreen->format;
-   template.last_level = 0;
-   template.width0 = width;
-   template.height0 = height;
-   template.depth0 = 1;
-   template.array_size = 1;
-   template.usage = PIPE_USAGE_DEFAULT;
-   template.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_BLIT_SOURCE;
-   template.flags = 0;
-
-   tex = vpipe->screen->resource_create(vpipe->screen, &template);
-   if (!tex)
-      return false;
-
-   *backbuffer = vpipe->screen->get_tex_surface(vpipe->screen, tex, 0, 0, 0,
-                                                template.bind);
-   pipe_resource_reference(&tex, NULL);
-
-   if (!*backbuffer)
-      return false;
-
-   /* Clear the backbuffer in case the video doesn't cover the whole window */
-   /* FIXME: Need to clear every time a frame moves and leaves dirty rects */
-   vpipe->surface_fill(vpipe, *backbuffer, 0, 0, width, height, 0);
-
-   return true;
-}
-#endif
-
 static void
 MacroBlocksToPipe(struct pipe_screen *screen,
                   unsigned int xvmc_picture_structure,
@@ -435,11 +384,6 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    subpicture_priv = surface_priv->subpicture ? surface_priv->subpicture->privData : NULL;
    vpipe = context_priv->vctx->vpipe;
 
-#if 0
-   if (!CreateOrResizeBackBuffer(context_priv->vctx, width, height, &context_priv->backbuffer))
-      return BadAlloc;
-#endif
-
    if (subpicture_priv) {
       struct pipe_video_rect src_rect = {surface_priv->subx, surface_priv->suby, surface_priv->subw, surface_priv->subh};
       struct pipe_video_rect dst_rect = {surface_priv->surfx, surface_priv->surfy, surface_priv->surfw, surface_priv->surfh};
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 5e976cb8916..330c8c2cf9d 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -42,7 +42,6 @@ struct pipe_fence_handle;
 typedef struct
 {
    struct vl_context *vctx;
-   struct pipe_surface *backbuffer;
    unsigned short subpicture_max_width;
    unsigned short subpicture_max_height;
 } XvMCContextPrivate;
-- 
cgit v1.2.3


From c6182cc6d48a7c076cfbdba241e29e5f5901ba52 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 27 Mar 2011 01:04:00 +0100
Subject: [g3dvl] improve and cleanup mc error handling

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 56 ++++++++++++++++++++----
 1 file changed, 48 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 218ff5d1c14..60d61abbf2a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -418,6 +418,8 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
       sampler.border_color[3] = 0.0f;
       /*sampler.max_anisotropy = ; */
       r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
+      if (!r->samplers.all[i])
+         goto error_samplers;
    }
 
    memset(&rs_state, 0, sizeof(rs_state));
@@ -427,8 +429,16 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    rs_state.point_size = BLOCK_WIDTH;
    rs_state.gl_rasterization_rules = true;
    r->rs_state = r->pipe->create_rasterizer_state(r->pipe, &rs_state);
+   if (!r->rs_state)
+      goto error_samplers;
 
    return true;
+
+error_samplers:
+   for (i = 0; i < 5; ++i)
+      r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
+
+   return false;
 }
 
 static void
@@ -468,10 +478,12 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
       goto error_pipe_state;
 
    renderer->vs = create_vert_shader(renderer);
-   renderer->fs = create_frag_shader(renderer);
+   if (!renderer->vs)
+      goto error_vs_shaders;
 
-   if (renderer->vs == NULL || renderer->fs == NULL)
-      goto error_shaders;
+   renderer->fs = create_frag_shader(renderer);
+   if (!renderer->fs)
+      goto error_fs_shaders;
 
    /* create a dummy sampler */
    memset(&tex_templ, 0, sizeof(tex_templ));
@@ -487,13 +499,25 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    tex_templ.last_level = 0;
    tex_templ.usage = PIPE_USAGE_STATIC;
    tex_dummy = pipe->screen->resource_create(pipe->screen, &tex_templ);
+   if (!tex_dummy)
+      goto error_dummy;
 
+   memset(&sampler_view, 0, sizeof(sampler_view));
    u_sampler_view_default_template(&sampler_view, tex_dummy, tex_dummy->format);
    renderer->dummy = pipe->create_sampler_view(pipe, tex_dummy, &sampler_view);
+   pipe_resource_reference(&tex_dummy, NULL);
+   if (!renderer->dummy)
+      goto error_dummy;
 
    return true;
 
-error_shaders:
+error_dummy:
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs);
+
+error_fs_shaders:
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs);
+
+error_vs_shaders:
    cleanup_pipe_state(renderer);
 
 error_pipe_state:
@@ -522,12 +546,14 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    unsigned i;
 
    assert(renderer && buffer);
+   assert(y && cb && cr);
 
    pipe_resource_reference(&buffer->textures.individual.y, y);
    pipe_resource_reference(&buffer->textures.individual.cr, cr);
    pipe_resource_reference(&buffer->textures.individual.cb, cb);
 
    for (i = 0; i < 3; ++i) {
+      memset(&sampler_view, 0, sizeof(sampler_view));
       u_sampler_view_default_template(&sampler_view,
                                       buffer->textures.all[i],
                                       buffer->textures.all[i]->format);
@@ -537,9 +563,19 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
       sampler_view.swizzle_a = PIPE_SWIZZLE_ONE;
       buffer->sampler_views.all[i] = renderer->pipe->create_sampler_view(
          renderer->pipe, buffer->textures.all[i], &sampler_view);
+      if (!buffer->sampler_views.all[i])
+         goto error_samplers;
    }
 
    return true;
+
+error_samplers:
+   for (i = 0; i < 3; ++i) {
+      pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
+      pipe_resource_reference(&buffer->textures.all[i], NULL);
+   }
+
+   return false;
 }
 
 void
@@ -549,10 +585,11 @@ vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer)
 
    assert(buffer);
 
-   for (i = 0; i < 3; ++i) {
+   for (i = 0; i < 5; ++i)
       pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
+
+   for (i = 0; i < 3; ++i)
       pipe_resource_reference(&buffer->textures.all[i], NULL);
-   }
 }
 
 void
@@ -563,6 +600,7 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
                             struct pipe_fence_handle **fence)
 {
    assert(renderer && buffer);
+   assert(surface && ref);
 
    if (not_empty_num_instances == 0 && empty_num_instances == 0)
       return;
@@ -573,8 +611,10 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
 
    /* if no reference frame provided use a dummy sampler instead */
-   buffer->sampler_views.individual.ref[0] = ref[0] ? ref[0] : renderer->dummy;
-   buffer->sampler_views.individual.ref[1] = ref[1] ? ref[1] : renderer->dummy;
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.ref[0],
+                               ref[0] ? ref[0] : renderer->dummy);
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.ref[1],
+                               ref[1] ? ref[1] : renderer->dummy);
 
    renderer->pipe->set_fragment_sampler_views(renderer->pipe, 5, buffer->sampler_views.all);
    renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 5, renderer->samplers.all);
-- 
cgit v1.2.3


From 8330bc29dda71c41c56b3c1989334823ae8779d4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 27 Mar 2011 01:41:10 +0100
Subject: [g3dvl] cleanup and improve idct error handling

---
 src/gallium/auxiliary/vl/vl_idct.c | 129 ++++++++++++++++++++++++++++---------
 1 file changed, 99 insertions(+), 30 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 6b0010a04bb..a21e06b7776 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -362,16 +362,34 @@ static bool
 init_shaders(struct vl_idct *idct, int color_swizzle)
 {
    idct->matrix_vs = create_vert_shader(idct, true, color_swizzle);
+   if (!idct->matrix_vs)
+      goto error_matrix_vs;
+
    idct->matrix_fs = create_matrix_frag_shader(idct);
+   if (!idct->matrix_fs)
+      goto error_matrix_fs;
 
    idct->transpose_vs = create_vert_shader(idct, false, color_swizzle);
+   if (!idct->transpose_vs)
+      goto error_transpose_vs;
+
    idct->transpose_fs = create_transpose_frag_shader(idct);
+   if (!idct->transpose_fs)
+      goto error_transpose_fs;
+
+   return true;
+
+error_transpose_fs:
+   idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
 
-   return
-      idct->matrix_vs != NULL &&
-      idct->matrix_fs != NULL &&
-      idct->transpose_vs != NULL &&
-      idct->transpose_fs != NULL;
+error_transpose_vs:
+   idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
+
+error_matrix_fs:
+   idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
+
+error_matrix_vs:
+   return false;
 }
 
 static void
@@ -392,6 +410,12 @@ init_state(struct vl_idct *idct)
 
    assert(idct);
 
+   memset(&rs_state, 0, sizeof(rs_state));
+   rs_state.gl_rasterization_rules = false;
+   idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
+   if (!idct->rs_state)
+      goto error_rs_state;
+
    for (i = 0; i < 4; ++i) {
       memset(&sampler, 0, sizeof(sampler));
       sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
@@ -403,24 +427,22 @@ init_state(struct vl_idct *idct)
       sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
       sampler.compare_func = PIPE_FUNC_ALWAYS;
       sampler.normalized_coords = 1;
-      /*sampler.shadow_ambient = ; */
-      /*sampler.lod_bias = ; */
-      sampler.min_lod = 0;
-      /*sampler.max_lod = ; */
-      /*sampler.border_color[0] = ; */
-      /*sampler.max_anisotropy = ; */
       idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
+      if (!idct->samplers.all[i])
+         goto error_samplers;
    }
 
-   memset(&rs_state, 0, sizeof(rs_state));
-   /*rs_state.sprite_coord_enable */
-   rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
-   rs_state.point_quad_rasterization = true;
-   rs_state.point_size = BLOCK_WIDTH;
-   rs_state.gl_rasterization_rules = false;
-   idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
-
    return true;
+
+error_samplers:
+   for (i = 0; i < 4; ++i)
+      if (idct->samplers.all[i])
+         idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
+
+   idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
+
+error_rs_state:
+   return false;
 }
 
 static void
@@ -457,6 +479,8 @@ init_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    template.array_size = 1;
    template.usage = PIPE_USAGE_STREAM;
    buffer->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
+   if (!buffer->textures.individual.source)
+      goto error;
 
    template.target = PIPE_TEXTURE_3D;
    template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
@@ -465,13 +489,15 @@ init_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    template.depth0 = NR_RENDER_TARGETS;
    template.usage = PIPE_USAGE_STATIC;
    buffer->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
+   if (!buffer->textures.individual.intermediate)
+      goto error;
 
    for (i = 0; i < 4; ++i) {
-      if(buffer->textures.all[i] == NULL)
-         return false; /* a texture failed to allocate */
-
+      memset(&sampler_view, 0, sizeof(sampler_view));
       u_sampler_view_default_template(&sampler_view, buffer->textures.all[i], buffer->textures.all[i]->format);
       buffer->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, buffer->textures.all[i], &sampler_view);
+      if (!buffer->sampler_views.all[i])
+         goto error;
    }
 
    template.target = PIPE_TEXTURE_2D;
@@ -482,8 +508,17 @@ init_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    template.depth0 = 1;
 
    buffer->destination = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
+   if (!buffer->destination)
+      goto error;
 
    return true;
+
+error:
+   for (i = 0; i < 4; ++i) {
+      pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
+      pipe_resource_reference(&buffer->textures.all[i], NULL);
+   }
+   return false;
 }
 
 static void
@@ -517,6 +552,8 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
       1
    };
 
+   assert(pipe);
+
    memset(&template, 0, sizeof(struct pipe_resource));
    template.target = PIPE_TEXTURE_2D;
    template.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
@@ -530,17 +567,24 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    template.flags = 0;
 
    matrix = pipe->screen->resource_create(pipe->screen, &template);
+   if (!matrix)
+      goto error_matrix;
 
-   /* matrix */
    buf_transfer = pipe->get_transfer
    (
       pipe, matrix,
       0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &rect
    );
+   if (!buf_transfer)
+      goto error_transfer;
+
    pitch = buf_transfer->stride / sizeof(float);
 
    f = pipe->transfer_map(pipe, buf_transfer);
+   if (!f)
+      goto error_map;
+
    for(i = 0; i < BLOCK_HEIGHT; ++i)
       for(j = 0; j < BLOCK_WIDTH; ++j)
          // transpose and scale
@@ -550,6 +594,15 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    pipe->transfer_destroy(pipe, buf_transfer);
 
    return matrix;
+
+error_map:
+   pipe->transfer_destroy(pipe, buf_transfer);
+
+error_transfer:
+   pipe_resource_reference(&matrix, NULL);
+
+error_matrix:
+   return NULL;
 }
 
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
@@ -600,7 +653,7 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    pipe_resource_reference(&buffer->textures.individual.transpose, idct->matrix);
 
    if (!init_textures(idct, buffer))
-      return NULL;
+      goto error_textures;
 
    /* init state */
    buffer->viewport[0].scale[0] = buffer->textures.individual.intermediate->width0;
@@ -622,6 +675,9 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
       buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface(
          idct->pipe, buffer->textures.individual.intermediate,
          &template);
+
+      if (!buffer->fb_state[0].cbufs[i])
+         goto error_matrix_surfaces;
    }
 
    buffer->fb_state[1].width = buffer->destination->width0;
@@ -635,6 +691,9 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    buffer->fb_state[1].cbufs[0] = idct->pipe->create_surface(
       idct->pipe, buffer->destination, &template);
 
+   if (!buffer->fb_state[1].cbufs[0])
+      goto error_transpose_surface;
+
    for(i = 0; i < 2; ++i) {
       buffer->viewport[i].scale[2] = 1;
       buffer->viewport[i].scale[3] = 1;
@@ -647,6 +706,16 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    }
 
    return buffer->destination;
+
+error_transpose_surface:
+   pipe_surface_reference(&buffer->fb_state[1].cbufs[0], NULL);
+
+error_matrix_surfaces:
+   for(i = 0; i < NR_RENDER_TARGETS; ++i)
+      pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
+
+error_textures:
+   return NULL;
 }
 
 void
@@ -654,13 +723,12 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
    unsigned i;
 
-   assert(buffer);
+   assert(idct && buffer);
 
-   for(i = 0; i < NR_RENDER_TARGETS; ++i) {
-      idct->pipe->surface_destroy(idct->pipe, buffer->fb_state[0].cbufs[i]);
-   }
+   for(i = 0; i < NR_RENDER_TARGETS; ++i)
+      pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
 
-   idct->pipe->surface_destroy(idct->pipe, buffer->fb_state[1].cbufs[0]);
+   pipe_surface_reference(&buffer->fb_state[1].cbufs[0], NULL);
 
    cleanup_textures(idct, buffer);
 }
@@ -668,7 +736,7 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 void
 vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
-   assert(idct);
+   assert(idct && buffer);
 
    struct pipe_box rect =
    {
@@ -697,6 +765,7 @@ vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *
    unsigned i;
 
    assert(buffer);
+   assert(block);
 
    tex_pitch = buffer->tex_transfer->stride / sizeof(short);
    texels = buffer->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
-- 
cgit v1.2.3


From 5a351e51291922aa295926215fdecccc0baeef51 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 27 Mar 2011 01:53:04 +0100
Subject: [g3dvl] no need for individual samplers for idct stage 1 & 2

---
 src/gallium/auxiliary/vl/vl_idct.c | 19 +++++++++----------
 src/gallium/auxiliary/vl/vl_idct.h | 10 +---------
 2 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index a21e06b7776..075b892628f 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -416,7 +416,7 @@ init_state(struct vl_idct *idct)
    if (!idct->rs_state)
       goto error_rs_state;
 
-   for (i = 0; i < 4; ++i) {
+   for (i = 0; i < 2; ++i) {
       memset(&sampler, 0, sizeof(sampler));
       sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
       sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
@@ -427,17 +427,17 @@ init_state(struct vl_idct *idct)
       sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
       sampler.compare_func = PIPE_FUNC_ALWAYS;
       sampler.normalized_coords = 1;
-      idct->samplers.all[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
-      if (!idct->samplers.all[i])
+      idct->samplers[i] = idct->pipe->create_sampler_state(idct->pipe, &sampler);
+      if (!idct->samplers[i])
          goto error_samplers;
    }
 
    return true;
 
 error_samplers:
-   for (i = 0; i < 4; ++i)
-      if (idct->samplers.all[i])
-         idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
+   for (i = 0; i < 2; ++i)
+      if (idct->samplers[i])
+         idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
 
    idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
 
@@ -450,8 +450,8 @@ cleanup_state(struct vl_idct *idct)
 {
    unsigned i;
 
-   for (i = 0; i < 4; ++i)
-      idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
+   for (i = 0; i < 2; ++i)
+      idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
 
    idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
 }
@@ -795,12 +795,12 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_
       num_verts = idct->blocks_x * idct->blocks_y * 4;
 
       idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
+      idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
 
       /* first stage */
       idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]);
       idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]);
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
-      idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[0]);
       idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
       util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances);
@@ -809,7 +809,6 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_
       idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]);
       idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]);
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
-      idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers.stage[1]);
       idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
       util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances);
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 913034e7ab4..14f66f858eb 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -44,15 +44,7 @@ struct vl_idct
 
    void *rs_state;
 
-   union
-   {
-      void *all[4];
-      void *stage[2][2];
-      struct {
-         void *matrix, *source;
-         void *transpose, *intermediate;
-      } individual;
-   } samplers;
+   void *samplers[2];
 
    void *matrix_vs, *transpose_vs;
    void *matrix_fs, *transpose_fs;
-- 
cgit v1.2.3


From 020328ca32a3b6548b4c064c4fe115e386752daa Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 27 Mar 2011 19:43:02 +0200
Subject: [g3dvl] introduction of ycbcr buffers

Moves most of the buffer creation out of the idct code.
---
 src/gallium/auxiliary/Makefile               |   3 +-
 src/gallium/auxiliary/vl/vl_idct.c           | 240 ++++++++++++---------------
 src/gallium/auxiliary/vl/vl_idct.h           |  22 +--
 src/gallium/auxiliary/vl/vl_mpeg12_context.c |  41 ++++-
 src/gallium/auxiliary/vl/vl_mpeg12_context.h |   5 +
 src/gallium/auxiliary/vl/vl_ycbcr_buffer.c   | 191 +++++++++++++++++++++
 src/gallium/auxiliary/vl/vl_ycbcr_buffer.h   |  89 ++++++++++
 7 files changed, 436 insertions(+), 155 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_ycbcr_buffer.c
 create mode 100644 src/gallium/auxiliary/vl/vl_ycbcr_buffer.h

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 869b2d486a4..0464df84e12 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -153,7 +153,8 @@ C_SOURCES = \
 	vl/vl_compositor.c \
 	vl/vl_csc.c \
         vl/vl_idct.c \
-        vl/vl_vertex_buffers.c
+        vl/vl_vertex_buffers.c \
+        vl/vl_ycbcr_buffer.c
 
 GALLIVM_SOURCES = \
         gallivm/lp_bld_arit.c \
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 075b892628f..8cfb56ea416 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -27,6 +27,7 @@
 
 #include "vl_idct.h"
 #include "vl_vertex_buffers.h"
+#include "vl_ycbcr_buffer.h"
 #include "vl_defines.h"
 #include "util/u_draw.h"
 #include <assert.h>
@@ -457,89 +458,91 @@ cleanup_state(struct vl_idct *idct)
 }
 
 static bool
-init_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
-   struct pipe_resource template;
-   struct pipe_sampler_view sampler_view;
+   struct pipe_resource tex_templ, *tex;
+   struct pipe_sampler_view sv_templ;
+   struct pipe_surface surf_templ;
    unsigned i;
 
    assert(idct && buffer);
 
-   /* create textures */
-   memset(&template, 0, sizeof(struct pipe_resource));
-   template.last_level = 0;
-   template.bind = PIPE_BIND_SAMPLER_VIEW;
-   template.flags = 0;
-
-   template.target = PIPE_TEXTURE_2D;
-   template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
-   template.width0 = idct->buffer_width / 4;
-   template.height0 = idct->buffer_height;
-   template.depth0 = 1;
-   template.array_size = 1;
-   template.usage = PIPE_USAGE_STREAM;
-   buffer->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
-   if (!buffer->textures.individual.source)
-      goto error;
-
-   template.target = PIPE_TEXTURE_3D;
-   template.format = PIPE_FORMAT_R16G16B16A16_SNORM;
-   template.width0 = idct->buffer_width / NR_RENDER_TARGETS;
-   template.height0 = idct->buffer_height / 4;
-   template.depth0 = NR_RENDER_TARGETS;
-   template.usage = PIPE_USAGE_STATIC;
-   buffer->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
-   if (!buffer->textures.individual.intermediate)
-      goto error;
+   memset(&tex_templ, 0, sizeof(tex_templ));
+   tex_templ.target = PIPE_TEXTURE_3D;
+   tex_templ.format = PIPE_FORMAT_R16G16B16A16_SNORM;
+   tex_templ.width0 = idct->buffer_width / NR_RENDER_TARGETS;
+   tex_templ.height0 = idct->buffer_height / 4;
+   tex_templ.depth0 = NR_RENDER_TARGETS;
+   tex_templ.array_size = 1;
+   tex_templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   tex_templ.usage = PIPE_USAGE_STATIC;
+
+   tex = idct->pipe->screen->resource_create(idct->pipe->screen, &tex_templ);
+   if (!tex)
+      goto error_tex;
+
+   memset(&sv_templ, 0, sizeof(sv_templ));
+   u_sampler_view_default_template(&sv_templ, tex, tex->format);
+   buffer->sampler_views.individual.intermediate =
+      idct->pipe->create_sampler_view(idct->pipe, tex, &sv_templ);
+   if (!buffer->sampler_views.individual.intermediate)
+         goto error_sampler_view;
+
+   buffer->fb_state[0].width = tex->width0;
+   buffer->fb_state[0].height = tex->height0;
+   buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS;
+   for(i = 0; i < NR_RENDER_TARGETS; ++i) {
+      memset(&surf_templ, 0, sizeof(surf_templ));
+      surf_templ.format = tex->format;
+      surf_templ.u.tex.first_layer = i;
+      surf_templ.u.tex.last_layer = i;
+      surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+      buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface(
+         idct->pipe, tex, &surf_templ);
 
-   for (i = 0; i < 4; ++i) {
-      memset(&sampler_view, 0, sizeof(sampler_view));
-      u_sampler_view_default_template(&sampler_view, buffer->textures.all[i], buffer->textures.all[i]->format);
-      buffer->sampler_views.all[i] = idct->pipe->create_sampler_view(idct->pipe, buffer->textures.all[i], &sampler_view);
-      if (!buffer->sampler_views.all[i])
-         goto error;
+      if (!buffer->fb_state[0].cbufs[i])
+         goto error_surfaces;
    }
 
-   template.target = PIPE_TEXTURE_2D;
-   /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
-   template.format = PIPE_FORMAT_R16_SNORM;
-   template.width0 = idct->buffer_width;
-   template.height0 = idct->buffer_height;
-   template.depth0 = 1;
-
-   buffer->destination = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
-   if (!buffer->destination)
-      goto error;
+   buffer->viewport[0].scale[0] = tex->width0;
+   buffer->viewport[0].scale[1] = tex->height0;
 
+   pipe_resource_reference(&tex, NULL);
    return true;
 
-error:
-   for (i = 0; i < 4; ++i) {
-      pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
-      pipe_resource_reference(&buffer->textures.all[i], NULL);
-   }
+error_surfaces:
+   for(i = 0; i < NR_RENDER_TARGETS; ++i)
+      pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
+
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
+
+error_sampler_view:
+   pipe_resource_reference(&tex, NULL);
+
+error_tex:
    return false;
 }
 
 static void
-cleanup_textures(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
    unsigned i;
 
    assert(idct && buffer);
 
-   for (i = 0; i < 4; ++i) {
-      pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
-      pipe_resource_reference(&buffer->textures.all[i], NULL);
-   }
+   for(i = 0; i < NR_RENDER_TARGETS; ++i)
+      pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
+
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
 }
 
-struct pipe_resource *
+struct pipe_sampler_view *
 vl_idct_upload_matrix(struct pipe_context *pipe)
 {
    const float scale = sqrtf(SCALE_FACTOR_16_TO_9);
 
-   struct pipe_resource template, *matrix;
+   struct pipe_resource tex_templ, *matrix;
+   struct pipe_sampler_view sv_templ, *sv;
    struct pipe_transfer *buf_transfer;
    unsigned i, j, pitch;
    float *f;
@@ -554,19 +557,19 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
 
    assert(pipe);
 
-   memset(&template, 0, sizeof(struct pipe_resource));
-   template.target = PIPE_TEXTURE_2D;
-   template.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-   template.last_level = 0;
-   template.width0 = 2;
-   template.height0 = 8;
-   template.depth0 = 1;
-   template.array_size = 1;
-   template.usage = PIPE_USAGE_IMMUTABLE;
-   template.bind = PIPE_BIND_SAMPLER_VIEW;
-   template.flags = 0;
-
-   matrix = pipe->screen->resource_create(pipe->screen, &template);
+   memset(&tex_templ, 0, sizeof(tex_templ));
+   tex_templ.target = PIPE_TEXTURE_2D;
+   tex_templ.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   tex_templ.last_level = 0;
+   tex_templ.width0 = 2;
+   tex_templ.height0 = 8;
+   tex_templ.depth0 = 1;
+   tex_templ.array_size = 1;
+   tex_templ.usage = PIPE_USAGE_IMMUTABLE;
+   tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
+   tex_templ.flags = 0;
+
+   matrix = pipe->screen->resource_create(pipe->screen, &tex_templ);
    if (!matrix)
       goto error_matrix;
 
@@ -593,7 +596,14 @@ vl_idct_upload_matrix(struct pipe_context *pipe)
    pipe->transfer_unmap(pipe, buf_transfer);
    pipe->transfer_destroy(pipe, buf_transfer);
 
-   return matrix;
+   memset(&sv_templ, 0, sizeof(sv_templ));
+   u_sampler_view_default_template(&sv_templ, matrix, matrix->format);
+   sv = pipe->create_sampler_view(pipe, matrix, &sv_templ);
+   pipe_resource_reference(&matrix, NULL);
+   if (!sv)
+      goto error_map;
+
+   return sv;
 
 error_map:
    pipe->transfer_destroy(pipe, buf_transfer);
@@ -608,7 +618,7 @@ error_matrix:
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
                   unsigned blocks_x, unsigned blocks_y,
-                  int color_swizzle, struct pipe_resource *matrix)
+                  int color_swizzle, struct pipe_sampler_view *matrix)
 {
    assert(idct && pipe && matrix);
 
@@ -617,7 +627,7 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
    idct->buffer_height = buffer_height;
    idct->blocks_x = blocks_x;
    idct->blocks_y = blocks_y;
-   pipe_resource_reference(&idct->matrix, matrix);
+   pipe_sampler_view_reference(&idct->matrix, matrix);
 
    if(!init_shaders(idct, color_swizzle))
       return false;
@@ -636,63 +646,35 @@ vl_idct_cleanup(struct vl_idct *idct)
    cleanup_shaders(idct);
    cleanup_state(idct);
 
-   pipe_resource_reference(&idct->matrix, NULL);
+   pipe_sampler_view_reference(&idct->matrix, NULL);
 }
 
-struct pipe_resource *
-vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+bool
+vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
+                    struct pipe_sampler_view *source, struct pipe_surface *destination)
 {
-   struct pipe_surface template;
-
    unsigned i;
 
    assert(buffer);
    assert(idct);
+   assert(source);
+   assert(destination);
 
-   pipe_resource_reference(&buffer->textures.individual.matrix, idct->matrix);
-   pipe_resource_reference(&buffer->textures.individual.transpose, idct->matrix);
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->matrix);
 
-   if (!init_textures(idct, buffer))
-      goto error_textures;
+   if (!init_intermediate(idct, buffer))
+      return false;
 
    /* init state */
-   buffer->viewport[0].scale[0] = buffer->textures.individual.intermediate->width0;
-   buffer->viewport[0].scale[1] = buffer->textures.individual.intermediate->height0;
-
-   buffer->viewport[1].scale[0] = buffer->destination->width0;
-   buffer->viewport[1].scale[1] = buffer->destination->height0;
-
-   buffer->fb_state[0].width = buffer->textures.individual.intermediate->width0;
-   buffer->fb_state[0].height = buffer->textures.individual.intermediate->height0;
-
-   buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS;
-   for(i = 0; i < NR_RENDER_TARGETS; ++i) {
-      memset(&template, 0, sizeof(template));
-      template.format = buffer->textures.individual.intermediate->format;
-      template.u.tex.first_layer = i;
-      template.u.tex.last_layer = i;
-      template.usage = PIPE_BIND_RENDER_TARGET;
-      buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface(
-         idct->pipe, buffer->textures.individual.intermediate,
-         &template);
-
-      if (!buffer->fb_state[0].cbufs[i])
-         goto error_matrix_surfaces;
-   }
-
-   buffer->fb_state[1].width = buffer->destination->width0;
-   buffer->fb_state[1].height = buffer->destination->height0;
-
+   buffer->fb_state[1].width = destination->texture->width0;
+   buffer->fb_state[1].height = destination->texture->height0;
    buffer->fb_state[1].nr_cbufs = 1;
+   pipe_surface_reference(&buffer->fb_state[1].cbufs[0], destination);
 
-   memset(&template, 0, sizeof(template));
-   template.format = buffer->destination->format;
-   template.usage = PIPE_BIND_RENDER_TARGET;
-   buffer->fb_state[1].cbufs[0] = idct->pipe->create_surface(
-      idct->pipe, buffer->destination, &template);
-
-   if (!buffer->fb_state[1].cbufs[0])
-      goto error_transpose_surface;
+   buffer->viewport[1].scale[0] = destination->texture->width0;
+   buffer->viewport[1].scale[1] = destination->texture->height0;
 
    for(i = 0; i < 2; ++i) {
       buffer->viewport[i].scale[2] = 1;
@@ -705,17 +687,7 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
       buffer->fb_state[i].zsbuf = NULL;
    }
 
-   return buffer->destination;
-
-error_transpose_surface:
-   pipe_surface_reference(&buffer->fb_state[1].cbufs[0], NULL);
-
-error_matrix_surfaces:
-   for(i = 0; i < NR_RENDER_TARGETS; ++i)
-      pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
-
-error_textures:
-   return NULL;
+   return true;
 }
 
 void
@@ -730,25 +702,29 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 
    pipe_surface_reference(&buffer->fb_state[1].cbufs[0], NULL);
 
-   cleanup_textures(idct, buffer);
+   cleanup_intermediate(idct, buffer);
 }
 
 void
 vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
+   struct pipe_resource *tex;
+
    assert(idct && buffer);
 
+   tex = buffer->sampler_views.individual.source->texture;
+
    struct pipe_box rect =
    {
       0, 0, 0,
-      buffer->textures.individual.source->width0,
-      buffer->textures.individual.source->height0,
+      tex->width0,
+      tex->height0,
       1
    };
 
    buffer->tex_transfer = idct->pipe->get_transfer
    (
-      idct->pipe, buffer->textures.individual.source,
+      idct->pipe, tex,
       0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &rect
    );
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 14f66f858eb..e0d441265ff 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -30,6 +30,7 @@
 
 #include <pipe/p_state.h>
 #include "vl_vertex_buffers.h"
+#include "vl_ycbcr_buffer.h"
 
 /* shader based inverse distinct cosinus transformation
  * expect usage of vl_vertex_buffers as a todo list
@@ -49,7 +50,7 @@ struct vl_idct
    void *matrix_vs, *transpose_vs;
    void *matrix_fs, *transpose_fs;
 
-   struct pipe_resource *matrix;
+   struct pipe_sampler_view *matrix;
 };
 
 /* a set of buffers to work with */
@@ -58,8 +59,6 @@ struct vl_idct_buffer
    struct pipe_viewport_state viewport[2];
    struct pipe_framebuffer_state fb_state[2];
 
-   struct pipe_resource *destination;
-
    union
    {
       struct pipe_sampler_view *all[4];
@@ -70,34 +69,25 @@ struct vl_idct_buffer
       } individual;
    } sampler_views;
 
-   union
-   {
-      struct pipe_resource *all[4];
-      struct pipe_resource *stage[2][2];
-      struct {
-         struct pipe_resource *matrix, *source;
-         struct pipe_resource *transpose, *intermediate;
-      } individual;
-   } textures;
-
    struct pipe_transfer *tex_transfer;
    short *texels;
 };
 
 /* upload the idct matrix, which can be shared by all idct instances of a pipe */
-struct pipe_resource *vl_idct_upload_matrix(struct pipe_context *pipe);
+struct pipe_sampler_view *vl_idct_upload_matrix(struct pipe_context *pipe);
 
 /* init an idct instance */
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
                   unsigned blocks_x, unsigned blocks_y,
-                  int color_swizzle, struct pipe_resource *matrix);
+                  int color_swizzle, struct pipe_sampler_view *matrix);
 
 /* destroy an idct instance */
 void vl_idct_cleanup(struct vl_idct *idct);
 
 /* init a buffer assosiated with agiven idct instance */
-struct pipe_resource *vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
+bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
+                         struct pipe_sampler_view *source, struct pipe_surface *destination);
 
 /* cleanup a buffer of an idct instance */
 void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 7fd3a0377c9..004d79d7324 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -268,12 +268,13 @@ static struct pipe_video_buffer *
 vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
 {
    struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-   struct pipe_resource *y, *cr, *cb;
    struct vl_mpeg12_buffer *buffer;
 
    struct pipe_resource res_template, *resource;
    struct pipe_surface surf_template;
    struct pipe_sampler_view sv_template;
+   struct vl_ycbcr_sampler_views *idct_views;
+   struct vl_ycbcr_surfaces *idct_surfaces;
 
    assert(ctx);
 
@@ -329,22 +330,50 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
 
    buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, ctx->pipe,
                                                       ctx->vertex_buffer_size);
-   if (!(y = vl_idct_init_buffer(&ctx->idct_y, &buffer->idct_y))) {
+
+   if (!vl_ycbcr_buffer_init(&buffer->idct_source, ctx->pipe,
+                             ctx->buffer_width, ctx->buffer_height,
+                             ctx->base.chroma_format,
+                             PIPE_FORMAT_R16G16B16A16_SNORM,
+                             PIPE_USAGE_STREAM)) {
+      FREE(buffer);
+      return NULL;
+   }
+
+   if (!vl_ycbcr_buffer_init(&buffer->idct_2_mc, ctx->pipe,
+                             ctx->buffer_width, ctx->buffer_height,
+                             ctx->base.chroma_format,
+                             PIPE_FORMAT_R16_SNORM,
+                             PIPE_USAGE_STATIC)) {
+      FREE(buffer);
+      return NULL;
+   }
+
+   idct_views = vl_ycbcr_get_sampler_views(&buffer->idct_source);
+   idct_surfaces = vl_ycbcr_get_surfaces(&buffer->idct_2_mc);
+
+   if (!vl_idct_init_buffer(&ctx->idct_y, &buffer->idct_y,
+                            idct_views->y, idct_surfaces->y)) {
       FREE(buffer);
       return NULL;
    }
 
-   if (!(cr = vl_idct_init_buffer(&ctx->idct_cr, &buffer->idct_cr))) {
+   if (!vl_idct_init_buffer(&ctx->idct_cb, &buffer->idct_cb,
+                            idct_views->cb, idct_surfaces->cb)) {
       FREE(buffer);
       return NULL;
    }
 
-   if (!(cb = vl_idct_init_buffer(&ctx->idct_cb, &buffer->idct_cb))) {
+   if (!vl_idct_init_buffer(&ctx->idct_cr, &buffer->idct_cr,
+                            idct_views->cr, idct_surfaces->cr)) {
       FREE(buffer);
       return NULL;
    }
 
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &buffer->mc, y, cr, cb)) {
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &buffer->mc,
+                                buffer->idct_2_mc.resources.y,
+                                buffer->idct_2_mc.resources.cr,
+                                buffer->idct_2_mc.resources.cb)) {
       FREE(buffer);
       return NULL;
    }
@@ -572,7 +601,7 @@ static bool
 init_idct(struct vl_mpeg12_context *ctx, unsigned buffer_width, unsigned buffer_height)
 {
    unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
-   struct pipe_resource *idct_matrix;
+   struct pipe_sampler_view *idct_matrix;
 
    /* TODO: Implement 422, 444 */
    assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
index e4236adcec3..d1af5cd2ac3 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -32,6 +32,7 @@
 #include "vl_idct.h"
 #include "vl_mpeg12_mc_renderer.h"
 #include "vl_compositor.h"
+#include "vl_ycbcr_buffer.h"
 
 struct pipe_screen;
 struct pipe_context;
@@ -62,6 +63,10 @@ struct vl_mpeg12_context
 struct vl_mpeg12_buffer
 {
    struct pipe_video_buffer base;
+
+   struct vl_ycbcr_buffer idct_source;
+   struct vl_ycbcr_buffer idct_2_mc;
+
    struct pipe_surface *surface;
    struct pipe_sampler_view *sampler_view;
 
diff --git a/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c b/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c
new file mode 100644
index 00000000000..56183891f9b
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c
@@ -0,0 +1,191 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vl_ycbcr_buffer.h"
+#include <util/u_format.h>
+#include <util/u_inlines.h>
+#include <util/u_sampler.h>
+#include <pipe/p_screen.h>
+#include <pipe/p_context.h>
+#include <assert.h>
+
+bool vl_ycbcr_buffer_init(struct vl_ycbcr_buffer *buffer,
+                          struct pipe_context *pipe,
+                          unsigned width, unsigned height,
+                          enum pipe_video_chroma_format chroma_format,
+                          enum pipe_format resource_format,
+                          unsigned usage)
+{
+   struct pipe_resource templ;
+
+   assert(buffer && pipe);
+
+   memset(buffer, 0, sizeof(struct vl_ycbcr_buffer));
+   buffer->pipe = pipe;
+
+   memset(&templ, 0, sizeof(templ));
+   templ.target = PIPE_TEXTURE_2D;
+   templ.format = resource_format;
+   templ.width0 = width / util_format_get_nr_components(resource_format);
+   templ.height0 = height;
+   templ.depth0 = 1;
+   templ.array_size = 1;
+   templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   templ.usage = usage;
+
+   buffer->resources.y = pipe->screen->resource_create(pipe->screen, &templ);
+   if (!buffer->resources.y)
+      goto error_resource_y;
+
+   if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+      templ.width0 /= 2;
+      templ.height0 /= 2;
+   } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
+      templ.height0 /= 2;
+   }
+
+   buffer->resources.cb = pipe->screen->resource_create(pipe->screen, &templ);
+   if (!buffer->resources.cb)
+      goto error_resource_cb;
+
+   buffer->resources.cr = pipe->screen->resource_create(pipe->screen, &templ);
+   if (!buffer->resources.cr)
+      goto error_resource_cr;
+
+   return true;
+
+error_resource_cr:
+   pipe_resource_reference(&buffer->resources.cb, NULL);
+
+error_resource_cb:
+   pipe_resource_reference(&buffer->resources.y, NULL);
+
+error_resource_y:
+   return false;
+}
+
+struct vl_ycbcr_sampler_views *vl_ycbcr_get_sampler_views(struct vl_ycbcr_buffer *buffer)
+{
+   struct pipe_sampler_view sv_templ;
+   struct pipe_context *pipe;
+
+   assert(buffer);
+
+   pipe = buffer->pipe;
+
+   if (!buffer->sampler_views.y) {
+      memset(&sv_templ, 0, sizeof(sv_templ));
+      u_sampler_view_default_template(&sv_templ, buffer->resources.y, buffer->resources.y->format);
+      buffer->sampler_views.y = pipe->create_sampler_view(pipe, buffer->resources.y, &sv_templ);
+      if (!buffer->sampler_views.y)
+         goto error;
+   }
+
+   if (!buffer->sampler_views.cb) {
+      memset(&sv_templ, 0, sizeof(sv_templ));
+      u_sampler_view_default_template(&sv_templ, buffer->resources.cb, buffer->resources.cb->format);
+      buffer->sampler_views.cb = pipe->create_sampler_view(pipe, buffer->resources.cb, &sv_templ);
+      if (!buffer->sampler_views.cb)
+         goto error;
+   }
+
+   if (!buffer->sampler_views.cr) {
+      memset(&sv_templ, 0, sizeof(sv_templ));
+      u_sampler_view_default_template(&sv_templ, buffer->resources.cr, buffer->resources.cr->format);
+      buffer->sampler_views.cr = pipe->create_sampler_view(pipe, buffer->resources.cr, &sv_templ);
+      if (!buffer->sampler_views.cr)
+         goto error;
+   }
+
+   return &buffer->sampler_views;
+
+error:
+   pipe_sampler_view_reference(&buffer->sampler_views.y, NULL);
+   pipe_sampler_view_reference(&buffer->sampler_views.cb, NULL);
+   pipe_sampler_view_reference(&buffer->sampler_views.cr, NULL);
+   return NULL;
+}
+
+struct vl_ycbcr_surfaces *vl_ycbcr_get_surfaces(struct vl_ycbcr_buffer *buffer)
+{
+   struct pipe_surface surf_templ;
+   struct pipe_context *pipe;
+
+   assert(buffer);
+
+   pipe = buffer->pipe;
+
+   if (!buffer->surfaces.y) {
+      memset(&surf_templ, 0, sizeof(surf_templ));
+      surf_templ.format = buffer->resources.y->format;
+      surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+      buffer->surfaces.y = pipe->create_surface(pipe, buffer->resources.y, &surf_templ);
+      if (!buffer->surfaces.y)
+         goto error;
+   }
+
+   if (!buffer->surfaces.cb) {
+      memset(&surf_templ, 0, sizeof(surf_templ));
+      surf_templ.format = buffer->resources.cb->format;
+      surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+      buffer->surfaces.cb = pipe->create_surface(pipe, buffer->resources.cb, &surf_templ);
+      if (!buffer->surfaces.cb)
+         goto error;
+   }
+
+   if (!buffer->surfaces.cr) {
+      memset(&surf_templ, 0, sizeof(surf_templ));
+      surf_templ.format = buffer->resources.cr->format;
+      surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+      buffer->surfaces.cr = pipe->create_surface(pipe, buffer->resources.cr, &surf_templ);
+      if (!buffer->surfaces.cr)
+         goto error;
+   }
+
+   return &buffer->surfaces;
+
+error:
+   pipe_surface_reference(&buffer->surfaces.y, NULL);
+   pipe_surface_reference(&buffer->surfaces.cb, NULL);
+   pipe_surface_reference(&buffer->surfaces.cr, NULL);
+   return NULL;
+}
+
+void vl_ycbcr_buffer_cleanup(struct vl_ycbcr_buffer *buffer)
+{
+   pipe_surface_reference(&buffer->surfaces.y, NULL);
+   pipe_surface_reference(&buffer->surfaces.cb, NULL);
+   pipe_surface_reference(&buffer->surfaces.cr, NULL);
+
+   pipe_sampler_view_reference(&buffer->sampler_views.y, NULL);
+   pipe_sampler_view_reference(&buffer->sampler_views.cb, NULL);
+   pipe_sampler_view_reference(&buffer->sampler_views.cr, NULL);
+
+   pipe_resource_reference(&buffer->resources.y, NULL);
+   pipe_resource_reference(&buffer->resources.cb, NULL);
+   pipe_resource_reference(&buffer->resources.cr, NULL);
+}
diff --git a/src/gallium/auxiliary/vl/vl_ycbcr_buffer.h b/src/gallium/auxiliary/vl/vl_ycbcr_buffer.h
new file mode 100644
index 00000000000..a116ed2a882
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_ycbcr_buffer.h
@@ -0,0 +1,89 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_ycbcr_buffer_h
+#define vl_ycbcr_buffer_h
+
+#include <pipe/p_state.h>
+
+/**
+ * implementation of a planar ycbcr buffer
+ */
+
+/* resources of a buffer */
+struct vl_ycbcr_resources
+{
+   struct pipe_resource *y, *cb, *cr;
+};
+
+/* sampler views of a buffer */
+struct vl_ycbcr_sampler_views
+{
+   struct pipe_sampler_view *y, *cb, *cr;
+};
+
+/* surfaces of a buffer */
+struct vl_ycbcr_surfaces
+{
+   struct pipe_surface *y, *cb, *cr;
+};
+
+/* planar buffer for vl data upload and manipulation */
+struct vl_ycbcr_buffer
+{
+   struct pipe_context           *pipe;
+   struct vl_ycbcr_resources     resources;
+   struct vl_ycbcr_sampler_views sampler_views;
+   struct vl_ycbcr_surfaces      surfaces;
+};
+
+/**
+ * initialize a buffer, creating its resources
+ */
+bool vl_ycbcr_buffer_init(struct vl_ycbcr_buffer *buffer,
+                          struct pipe_context *pipe,
+                          unsigned width, unsigned height,
+                          enum pipe_video_chroma_format chroma_format,
+                          enum pipe_format resource_format,
+                          unsigned usage);
+
+/**
+ * create default sampler views for the buffer on demand
+ */
+struct vl_ycbcr_sampler_views *vl_ycbcr_get_sampler_views(struct vl_ycbcr_buffer *buffer);
+
+/**
+ * create default surfaces for the buffer on demand
+ */
+struct vl_ycbcr_surfaces *vl_ycbcr_get_surfaces(struct vl_ycbcr_buffer *buffer);
+
+/**
+ * cleanup the buffer destroying all its resources
+ */
+void vl_ycbcr_buffer_cleanup(struct vl_ycbcr_buffer *buffer);
+
+#endif
-- 
cgit v1.2.3


From f3c9161b15988e90a2f727c8260ac3bff41912fd Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 27 Mar 2011 20:41:43 +0200
Subject: [g3dvl] remove texture dependencies from mc code

---
 src/gallium/auxiliary/vl/vl_mpeg12_context.c     |  4 ++--
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 22 ++++++++--------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  8 +-------
 3 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 004d79d7324..6388b3e23cb 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -372,8 +372,8 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
 
    if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &buffer->mc,
                                 buffer->idct_2_mc.resources.y,
-                                buffer->idct_2_mc.resources.cr,
-                                buffer->idct_2_mc.resources.cb)) {
+                                buffer->idct_2_mc.resources.cb,
+                                buffer->idct_2_mc.resources.cr)) {
       FREE(buffer);
       return NULL;
    }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 60d61abbf2a..d2ed7470598 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -539,30 +539,29 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 
 bool
 vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
-                         struct pipe_resource *y, struct pipe_resource *cr, struct pipe_resource *cb)
+                         struct pipe_resource *y, struct pipe_resource *cb, struct pipe_resource *cr)
 {
    struct pipe_sampler_view sampler_view;
+   struct pipe_resource *res[3];
 
    unsigned i;
 
    assert(renderer && buffer);
    assert(y && cb && cr);
 
-   pipe_resource_reference(&buffer->textures.individual.y, y);
-   pipe_resource_reference(&buffer->textures.individual.cr, cr);
-   pipe_resource_reference(&buffer->textures.individual.cb, cb);
+   res[0] = y;
+   res[1] = cb;
+   res[2] = cr;
 
    for (i = 0; i < 3; ++i) {
       memset(&sampler_view, 0, sizeof(sampler_view));
-      u_sampler_view_default_template(&sampler_view,
-                                      buffer->textures.all[i],
-                                      buffer->textures.all[i]->format);
+      u_sampler_view_default_template(&sampler_view, res[i], res[i]->format);
       sampler_view.swizzle_r = i == 0 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO;
       sampler_view.swizzle_g = i == 1 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO;
       sampler_view.swizzle_b = i == 2 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO;
       sampler_view.swizzle_a = PIPE_SWIZZLE_ONE;
       buffer->sampler_views.all[i] = renderer->pipe->create_sampler_view(
-         renderer->pipe, buffer->textures.all[i], &sampler_view);
+         renderer->pipe, res[i], &sampler_view);
       if (!buffer->sampler_views.all[i])
          goto error_samplers;
    }
@@ -570,10 +569,8 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    return true;
 
 error_samplers:
-   for (i = 0; i < 3; ++i) {
+   for (i = 0; i < 3; ++i)
       pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
-      pipe_resource_reference(&buffer->textures.all[i], NULL);
-   }
 
    return false;
 }
@@ -587,9 +584,6 @@ vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer)
 
    for (i = 0; i < 5; ++i)
       pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
-
-   for (i = 0; i < 3; ++i)
-      pipe_resource_reference(&buffer->textures.all[i], NULL);
 }
 
 void
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 29a548ef70d..0bb17ef487b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -67,12 +67,6 @@ struct vl_mpeg12_mc_buffer
       struct pipe_sampler_view *all[5];
       struct { struct pipe_sampler_view *y, *cb, *cr, *ref[2]; } individual;
    } sampler_views;
-
-   union
-   {
-      struct pipe_resource *all[3];
-      struct { struct pipe_resource *y, *cb, *cr; } individual;
-   } textures;
 };
 
 bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
@@ -84,7 +78,7 @@ bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
 void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 
 bool vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
-                              struct pipe_resource *y, struct pipe_resource *cr, struct pipe_resource *cb);
+                              struct pipe_resource *y, struct pipe_resource *cb, struct pipe_resource *cr);
 
 void vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer);
 
-- 
cgit v1.2.3


From 1f3a85ec7931c5d67fce0ec1e845d6c91048e599 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 29 Mar 2011 20:01:49 +0200
Subject: [g3dvl] let mc code work on the different color planes seperately

---
 src/gallium/auxiliary/vl/vl_compositor.c         |  44 ++++---
 src/gallium/auxiliary/vl/vl_compositor.h         |   4 +-
 src/gallium/auxiliary/vl/vl_mpeg12_context.c     | 158 ++++++++++++++---------
 src/gallium/auxiliary/vl/vl_mpeg12_context.h     |  11 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 120 +++++++----------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  13 +-
 src/gallium/auxiliary/vl/vl_ycbcr_buffer.c       |  16 ++-
 7 files changed, 188 insertions(+), 178 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index b0e0b3bfa72..f62706e5066 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -104,8 +104,8 @@ create_frag_shader_ycbcr_2_rgb(struct vl_compositor *c)
 {
    struct ureg_program *shader;
    struct ureg_src tc;
-   struct ureg_src csc[4];
-   struct ureg_src sampler;
+   struct ureg_src csc[3];
+   struct ureg_src sampler[3];
    struct ureg_dst texel;
    struct ureg_dst fragment;
    unsigned i;
@@ -115,19 +115,25 @@ create_frag_shader_ycbcr_2_rgb(struct vl_compositor *c)
       return false;
 
    tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
-   for (i = 0; i < 4; ++i)
+   for (i = 0; i < 3; ++i) {
       csc[i] = ureg_DECL_constant(shader, i);
-   sampler = ureg_DECL_sampler(shader, 0);
+      sampler[i] = ureg_DECL_sampler(shader, i);
+   }
    texel = ureg_DECL_temporary(shader);
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
-    * texel = tex(tc, sampler)
+    * texel.xyz = tex(tc, sampler[i])
     * fragment = csc * texel
     */
-   ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
+   for (i = 0; i < 3; ++i)
+      ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, tc, sampler[i]);
+
+   ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
+
    for (i = 0; i < 3; ++i)
       ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));
+
    ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
 
    ureg_release_temporary(shader, texel);
@@ -531,7 +537,7 @@ static unsigned gen_data(struct vl_compositor *c,
 }
 
 static void draw_layers(struct vl_compositor *c,
-                        struct pipe_sampler_view *src_surface,
+                        struct vl_ycbcr_sampler_views *src_sampler,
                         struct pipe_video_rect *src_rect,
                         struct pipe_video_rect *dst_rect)
 {
@@ -541,37 +547,39 @@ static void draw_layers(struct vl_compositor *c,
    unsigned i;
 
    assert(c);
-   assert(src_surface);
+   assert(src_sampler);
    assert(src_rect);
    assert(dst_rect);
 
-   num_rects = gen_data(c, src_surface, src_rect, dst_rect, surfaces, frag_shaders);
+   num_rects = gen_data(c, src_sampler->y, src_rect, dst_rect, surfaces, frag_shaders);
 
    c->pipe->bind_blend_state(c->pipe, c->blend);
    for (i = 0; i < num_rects; ++i) {
       c->pipe->bind_fs_state(c->pipe, frag_shaders[i]);
-      c->pipe->set_fragment_sampler_views(c->pipe, surfaces[i][1] ? 2 : 1, &surfaces[i][0]);
+      if (i == 0) {
+         c->pipe->set_fragment_sampler_views(c->pipe, 3, &src_sampler->y);
+      } else {
+         c->pipe->set_fragment_sampler_views(c->pipe, surfaces[i][1] ? 2 : 1, &surfaces[i][0]);
+      }
 
       util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, i * 4, 4);
    }
 }
 
 void vl_compositor_render(struct vl_compositor          *compositor,
-                          struct pipe_sampler_view      *src_surface,
-                          enum pipe_mpeg12_picture_type picture_type,
+                          struct vl_ycbcr_sampler_views *src_sampler,
                           struct pipe_video_rect        *src_area,
                           struct pipe_surface           *dst_surface,
                           struct pipe_video_rect        *dst_area,
                           struct pipe_fence_handle      **fence)
 {
-   void *samplers[2];
+   void *samplers[3];
 
    assert(compositor);
-   assert(src_surface);
+   assert(src_sampler);
    assert(src_area);
    assert(dst_surface);
    assert(dst_area);
-   assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME);
 
    if (compositor->fb_state.width != dst_surface->width) {
       compositor->fb_inv_size.x = 1.0f / dst_surface->width;
@@ -593,17 +601,17 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    compositor->viewport.translate[2] = 0;
    compositor->viewport.translate[3] = 0;
 
-   samplers[0] = samplers[1] = compositor->sampler;
+   samplers[0] = samplers[1] = samplers[2] = compositor->sampler;
 
    compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state);
    compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport);
-   compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 2, &samplers[0]);
+   compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 3, &samplers[0]);
    compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
    compositor->pipe->set_vertex_buffers(compositor->pipe, 1, &compositor->vertex_buf);
    compositor->pipe->bind_vertex_elements_state(compositor->pipe, compositor->vertex_elems_state);
    compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf);
 
-   draw_layers(compositor, src_surface, src_area, dst_area);
+   draw_layers(compositor, src_sampler, src_area, dst_area);
 
    assert(!compositor->dirty_layers);
    compositor->pipe->flush(compositor->pipe, fence);
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 249eb685b40..e10a663f860 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -32,6 +32,7 @@
 #include <pipe/p_state.h>
 #include <pipe/p_video_state.h>
 #include "vl_types.h"
+#include "vl_ycbcr_buffer.h"
 
 struct pipe_context;
 struct keymap;
@@ -78,8 +79,7 @@ void vl_compositor_set_layers(struct vl_compositor *compositor,
                               unsigned num_layers);
 
 void vl_compositor_render(struct vl_compositor          *compositor,
-                          struct pipe_sampler_view      *src_surface,
-                          enum pipe_mpeg12_picture_type picture_type,
+                          struct vl_ycbcr_sampler_views *src_sampler,
                           struct pipe_video_rect        *src_area,
                           struct pipe_surface           *dst_surface,
                           struct pipe_video_rect        *dst_area,
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 6388b3e23cb..94ac70e2126 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -89,13 +89,16 @@ vl_mpeg12_buffer_destroy(struct pipe_video_buffer *buffer)
    struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)buf->base.context;
    assert(buf && ctx);
 
+   vl_ycbcr_buffer_cleanup(&buf->idct_source);
+   vl_ycbcr_buffer_cleanup(&buf->idct_2_mc);
+   vl_ycbcr_buffer_cleanup(&buf->render_result);
    vl_vb_cleanup(&buf->vertex_stream);
    vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct_y);
    vl_idct_cleanup_buffer(&ctx->idct_cb, &buf->idct_cb);
    vl_idct_cleanup_buffer(&ctx->idct_cr, &buf->idct_cr);
-   vl_mpeg12_mc_cleanup_buffer(&buf->mc);
-   pipe_surface_reference(&buf->surface, NULL);
-   pipe_sampler_view_reference(&buf->sampler_view, NULL);
+   vl_mpeg12_mc_cleanup_buffer(&buf->mc_y);
+   vl_mpeg12_mc_cleanup_buffer(&buf->mc_cb);
+   vl_mpeg12_mc_cleanup_buffer(&buf->mc_cr);
 
    FREE(buf);
 }
@@ -166,6 +169,10 @@ vl_mpeg12_buffer_flush(struct pipe_video_buffer *buffer,
    struct vl_mpeg12_buffer *past = (struct vl_mpeg12_buffer *)refs[0];
    struct vl_mpeg12_buffer *future = (struct vl_mpeg12_buffer *)refs[1];
 
+   struct vl_ycbcr_surfaces *surfaces;
+   struct vl_ycbcr_sampler_views *sv_past;
+   struct vl_ycbcr_sampler_views *sv_future;
+
    struct pipe_sampler_view *sv_refs[2];
    unsigned ne_start, ne_num, e_start, e_num;
    struct vl_mpeg12_context *ctx;
@@ -184,13 +191,28 @@ vl_mpeg12_buffer_flush(struct pipe_video_buffer *buffer,
    vl_idct_flush(&ctx->idct_cr, &buf->idct_cr, ne_num);
    vl_idct_flush(&ctx->idct_cb, &buf->idct_cb, ne_num);
 
-   sv_refs[0] = past ? past->sampler_view : NULL;
-   sv_refs[1] = future ? future->sampler_view : NULL;
+   surfaces = vl_ycbcr_get_surfaces(&buf->render_result);
+
+   sv_past = past ? vl_ycbcr_get_sampler_views(&past->render_result) : NULL;
+   sv_future = future ? vl_ycbcr_get_sampler_views(&future->render_result) : NULL;
+
+   sv_refs[0] = sv_past ? sv_past->y : NULL;
+   sv_refs[1] = sv_future ? sv_future->y : NULL;
+
+   vl_mpeg12_mc_renderer_flush(&ctx->mc_y, &buf->mc_y, surfaces->y,
+                               sv_refs, ne_start, ne_num, e_start, e_num, fence);
 
-   vl_mpeg12_mc_renderer_flush(&ctx->mc_renderer, &buf->mc,
-                               buf->surface, sv_refs,
-                               ne_start, ne_num, e_start, e_num,
-                               fence);
+   sv_refs[0] = sv_past ? sv_past->cb : NULL;
+   sv_refs[1] = sv_future ? sv_future->cb : NULL;
+
+   vl_mpeg12_mc_renderer_flush(&ctx->mc_cb, &buf->mc_cb, surfaces->cb,
+                               sv_refs, ne_start, ne_num, e_start, e_num, fence);
+
+   sv_refs[0] = sv_past ? sv_past->cr : NULL;
+   sv_refs[1] = sv_future ? sv_future->cr : NULL;
+
+   vl_mpeg12_mc_renderer_flush(&ctx->mc_cr, &buf->mc_cr, surfaces->cr,
+                               sv_refs, ne_start, ne_num, e_start, e_num, fence);
 }
 
 static void
@@ -209,7 +231,9 @@ vl_mpeg12_destroy(struct pipe_video_context *vpipe)
    ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
 
    vl_compositor_cleanup(&ctx->compositor);
-   vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+   vl_mpeg12_mc_renderer_cleanup(&ctx->mc_y);
+   vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cb);
+   vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cr);
    vl_idct_cleanup(&ctx->idct_y);
    vl_idct_cleanup(&ctx->idct_cr);
    vl_idct_cleanup(&ctx->idct_cb);
@@ -270,10 +294,7 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
    struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
    struct vl_mpeg12_buffer *buffer;
 
-   struct pipe_resource res_template, *resource;
-   struct pipe_surface surf_template;
-   struct pipe_sampler_view sv_template;
-   struct vl_ycbcr_sampler_views *idct_views;
+   struct vl_ycbcr_sampler_views *idct_views, *mc_views;
    struct vl_ycbcr_surfaces *idct_surfaces;
 
    assert(ctx);
@@ -289,41 +310,6 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
    buffer->base.unmap = vl_mpeg12_buffer_unmap;
    buffer->base.flush = vl_mpeg12_buffer_flush;
 
-   memset(&res_template, 0, sizeof(res_template));
-   res_template.target = PIPE_TEXTURE_2D;
-   res_template.format = ctx->decode_format;
-   res_template.last_level = 0;
-   res_template.width0 = ctx->buffer_width;
-   res_template.height0 = ctx->buffer_height;
-   res_template.depth0 = 1;
-   res_template.array_size = 1;
-   res_template.usage = PIPE_USAGE_DEFAULT;
-   res_template.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-   res_template.flags = 0;
-   resource = ctx->pipe->screen->resource_create(ctx->pipe->screen, &res_template);
-   if (!resource) {
-      FREE(buffer);
-      return NULL;
-   }
-
-   memset(&surf_template, 0, sizeof(surf_template));
-   surf_template.format = resource->format;
-   surf_template.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-   buffer->surface = ctx->pipe->create_surface(ctx->pipe, resource, &surf_template);
-   if (!buffer->surface) {
-      FREE(buffer);
-      return NULL;
-   }
-
-   u_sampler_view_default_template(&sv_template, resource, resource->format);
-   buffer->sampler_view = ctx->pipe->create_sampler_view(ctx->pipe, resource, &sv_template);
-   if (!buffer->sampler_view) {
-      FREE(buffer);
-      return NULL;
-   }
-
-   pipe_resource_reference(&resource, NULL);
-
    buffer->vertex_bufs.individual.quad.stride = ctx->quads.stride;
    buffer->vertex_bufs.individual.quad.buffer_offset = ctx->quads.buffer_offset;
    pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, ctx->quads.buffer);
@@ -349,6 +335,15 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
       return NULL;
    }
 
+   if (!vl_ycbcr_buffer_init(&buffer->render_result, ctx->pipe,
+                             ctx->buffer_width, ctx->buffer_height,
+                             ctx->base.chroma_format,
+                             PIPE_FORMAT_R8_SNORM,
+                             PIPE_USAGE_STATIC)) {
+      FREE(buffer);
+      return NULL;
+   }
+
    idct_views = vl_ycbcr_get_sampler_views(&buffer->idct_source);
    idct_surfaces = vl_ycbcr_get_surfaces(&buffer->idct_2_mc);
 
@@ -370,10 +365,19 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
       return NULL;
    }
 
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc_renderer, &buffer->mc,
-                                buffer->idct_2_mc.resources.y,
-                                buffer->idct_2_mc.resources.cb,
-                                buffer->idct_2_mc.resources.cr)) {
+   mc_views = vl_ycbcr_get_sampler_views(&buffer->idct_2_mc);
+
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc_y, &buffer->mc_y, mc_views->y)) {
+      FREE(buffer);
+      return NULL;
+   }
+
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc_cb, &buffer->mc_cb, mc_views->cb)) {
+      FREE(buffer);
+      return NULL;
+   }
+
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc_cr, &buffer->mc_cr, mc_views->cr)) {
       FREE(buffer);
       return NULL;
    }
@@ -477,6 +481,7 @@ vl_mpeg12_render_picture(struct pipe_video_context     *vpipe,
 {
    struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)src_surface;
+   struct vl_ycbcr_sampler_views *sampler_views;
 
    assert(vpipe);
    assert(src_surface);
@@ -484,8 +489,9 @@ vl_mpeg12_render_picture(struct pipe_video_context     *vpipe,
    assert(dst_surface);
    assert(dst_area);
 
-   vl_compositor_render(&ctx->compositor, buf->sampler_view,
-                        picture_type, src_area,
+   sampler_views = vl_ycbcr_get_sampler_views(&buf->render_result);
+
+   vl_compositor_render(&ctx->compositor, sampler_views, src_area,
                         dst_surface, dst_area, fence);
 }
 
@@ -631,14 +637,14 @@ init_idct(struct vl_mpeg12_context *ctx, unsigned buffer_width, unsigned buffer_
       chroma_blocks_y = 2;
    }
 
-   if(!vl_idct_init(&ctx->idct_cr, ctx->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
-      return false;
-
    if(!vl_idct_init(&ctx->idct_cb, ctx->pipe, chroma_width, chroma_height,
                     chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
       return false;
 
+   if(!vl_idct_init(&ctx->idct_cr, ctx->pipe, chroma_width, chroma_height,
+                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
+      return false;
+
    return true;
 }
 
@@ -701,9 +707,31 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
       return NULL;
    }
 
-   if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
+   if (!vl_mpeg12_mc_renderer_init(&ctx->mc_y, ctx->pipe,
+                                   ctx->buffer_width, ctx->buffer_height,
+                                   chroma_format, TGSI_SWIZZLE_X)) {
+      vl_idct_cleanup(&ctx->idct_y);
+      vl_idct_cleanup(&ctx->idct_cr);
+      vl_idct_cleanup(&ctx->idct_cb);
+      ctx->pipe->destroy(ctx->pipe);
+      FREE(ctx);
+      return NULL;
+   }
+
+   if (!vl_mpeg12_mc_renderer_init(&ctx->mc_cb, ctx->pipe,
+                                   ctx->buffer_width, ctx->buffer_height,
+                                   chroma_format, TGSI_SWIZZLE_Y)) {
+      vl_idct_cleanup(&ctx->idct_y);
+      vl_idct_cleanup(&ctx->idct_cr);
+      vl_idct_cleanup(&ctx->idct_cb);
+      ctx->pipe->destroy(ctx->pipe);
+      FREE(ctx);
+      return NULL;
+   }
+
+   if (!vl_mpeg12_mc_renderer_init(&ctx->mc_cr, ctx->pipe,
                                    ctx->buffer_width, ctx->buffer_height,
-                                   chroma_format)) {
+                                   chroma_format, TGSI_SWIZZLE_Z)) {
       vl_idct_cleanup(&ctx->idct_y);
       vl_idct_cleanup(&ctx->idct_cr);
       vl_idct_cleanup(&ctx->idct_cb);
@@ -716,7 +744,9 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
       vl_idct_cleanup(&ctx->idct_y);
       vl_idct_cleanup(&ctx->idct_cr);
       vl_idct_cleanup(&ctx->idct_cb);
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_y);
+      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cb);
+      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cr);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
@@ -726,7 +756,9 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
       vl_idct_cleanup(&ctx->idct_y);
       vl_idct_cleanup(&ctx->idct_cr);
       vl_idct_cleanup(&ctx->idct_cb);
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
+      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_y);
+      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cb);
+      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cr);
       vl_compositor_cleanup(&ctx->compositor);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
index d1af5cd2ac3..f84e23508ed 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -51,8 +51,8 @@ struct vl_mpeg12_context
    unsigned vertex_buffer_size;
    void *vertex_elems_state;
 
-   struct vl_idct idct_y, idct_cr, idct_cb;
-   struct vl_mpeg12_mc_renderer mc_renderer;
+   struct vl_idct idct_y, idct_cb, idct_cr;
+   struct vl_mpeg12_mc_renderer mc_y, mc_cb, mc_cr;
    struct vl_compositor compositor;
 
    void *rast;
@@ -66,9 +66,7 @@ struct vl_mpeg12_buffer
 
    struct vl_ycbcr_buffer idct_source;
    struct vl_ycbcr_buffer idct_2_mc;
-
-   struct pipe_surface *surface;
-   struct pipe_sampler_view *sampler_view;
+   struct vl_ycbcr_buffer render_result;
 
    struct vl_vertex_buffer vertex_stream;
 
@@ -81,8 +79,7 @@ struct vl_mpeg12_buffer
    } vertex_bufs;
 
    struct vl_idct_buffer idct_y, idct_cb, idct_cr;
-
-   struct vl_mpeg12_mc_buffer mc;
+   struct vl_mpeg12_mc_buffer mc_y, mc_cb, mc_cr;
 };
 
 /* drivers can call this function in their pipe_video_context constructors and pass it
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index d2ed7470598..7c3fea557ab 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -217,11 +217,11 @@ calc_field(struct ureg_program *shader)
 }
 
 static struct ureg_dst
-fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
+fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field, unsigned color_swizzle)
 {
-   struct ureg_src tc[3], sampler[3], eb[2];
+   struct ureg_src tc[3], sampler, eb[2];
    struct ureg_dst texel, t_tc, t_eb_info;
-   unsigned i, label;
+   unsigned label;
 
    texel = ureg_DECL_temporary(shader);
    t_tc = ureg_DECL_temporary(shader);
@@ -234,9 +234,10 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
    eb[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0, TGSI_INTERPOLATE_CONSTANT);
    eb[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1, TGSI_INTERPOLATE_CONSTANT);
 
-   for (i = 0; i < 3; ++i)  {
-      sampler[i] = ureg_DECL_sampler(shader, i);
-   }
+   //for (i = 0; i < 3; ++i)  {
+   //   sampler[i] = ureg_DECL_sampler(shader, i);
+   //}
+   sampler = ureg_DECL_sampler(shader, 0);
 
    /*
     * texel.y  = tex(field.y ? tc[1] : tc[0], sampler[0])
@@ -256,19 +257,16 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
    ureg_SLT(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ), ureg_src(t_eb_info), ureg_imm1f(shader, 0.5f));
 
    ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.0f));
-   for (i = 0; i < 3; ++i) {
-      ureg_IF(shader, ureg_scalar(ureg_src(t_eb_info), TGSI_SWIZZLE_X + i), &label);
+   ureg_IF(shader, ureg_scalar(ureg_src(t_eb_info), color_swizzle), &label);
 
-         /* Nouveau can't writemask tex dst regs (yet?), so this won't work anymore on nvidia hardware */
-         if(i==0 || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
-            ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, ureg_src(t_tc), sampler[i]);
-         } else {
-            ureg_TEX(shader, ureg_writemask(texel, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, tc[2], sampler[i]);
-         }
+      if(color_swizzle==TGSI_SWIZZLE_X || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
+         ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
+      } else {
+         ureg_TEX(shader, texel, TGSI_TEXTURE_3D, tc[2], sampler);
+      }
 
-      ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
-      ureg_ENDIF(shader);
-   }
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+   ureg_ENDIF(shader);
 
    ureg_release_temporary(shader, t_tc);
    ureg_release_temporary(shader, t_eb_info);
@@ -290,13 +288,13 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
       tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
 
    for (i = 0; i < 2; ++i) {
-      sampler[i] = ureg_DECL_sampler(shader, i + 3);
+      sampler[i] = ureg_DECL_sampler(shader, i + 1);
       ref[i] = ureg_DECL_temporary(shader);
    }
 
    result = ureg_DECL_temporary(shader);
 
-   ureg_MOV(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.5f));
+   ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
 
    ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_X), &intra_label);
       /*
@@ -316,7 +314,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
       ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
       ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
 
-      ureg_LRP(shader, ureg_writemask(result, TGSI_WRITEMASK_XYZ),
+      ureg_LRP(shader, result,
                ureg_scalar(info, TGSI_SWIZZLE_Y),
                ureg_src(ref[1]), ureg_src(ref[0]));
 
@@ -330,7 +328,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 }
 
 static void *
-create_frag_shader(struct vl_mpeg12_mc_renderer *r)
+create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned color_swizzle)
 {
    struct ureg_program *shader;
    struct ureg_dst result;
@@ -344,11 +342,11 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r)
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    field = calc_field(shader);
-   texel = fetch_ycbcr(r, shader, field);
+   texel = fetch_ycbcr(r, shader, field, color_swizzle);
 
    result = fetch_ref(shader, field);
 
-   ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(texel), ureg_src(result));
+   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(result));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
@@ -363,13 +361,11 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_sampler_state sampler;
    struct pipe_rasterizer_state rs_state;
-   unsigned filters[5];
+   unsigned filters[3];
    unsigned i;
 
    assert(r);
 
-   r->viewport.scale[0] = r->buffer_width;
-   r->viewport.scale[1] = r->buffer_height;
    r->viewport.scale[2] = 1;
    r->viewport.scale[3] = 1;
    r->viewport.translate[0] = 0;
@@ -377,27 +373,17 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    r->viewport.translate[2] = 0;
    r->viewport.translate[3] = 0;
 
-   r->fb_state.width = r->buffer_width;
-   r->fb_state.height = r->buffer_height;
    r->fb_state.nr_cbufs = 1;
    r->fb_state.zsbuf = NULL;
 
-   /* Luma filter */
+   /* source filter */
    filters[0] = PIPE_TEX_FILTER_NEAREST;
-   /* Chroma filters */
-   if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444 || true) { //TODO
-      filters[1] = PIPE_TEX_FILTER_NEAREST;
-      filters[2] = PIPE_TEX_FILTER_NEAREST;
-   }
-   else {
-      filters[1] = PIPE_TEX_FILTER_LINEAR;
-      filters[2] = PIPE_TEX_FILTER_LINEAR;
-   }
+
    /* Fwd, bkwd ref filters */
-   filters[3] = PIPE_TEX_FILTER_LINEAR;
-   filters[4] = PIPE_TEX_FILTER_LINEAR;
+   filters[1] = PIPE_TEX_FILTER_LINEAR;
+   filters[2] = PIPE_TEX_FILTER_LINEAR;
 
-   for (i = 0; i < 5; ++i) {
+   for (i = 0; i < 3; ++i) {
       memset(&sampler, 0, sizeof(sampler));
       sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
       sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
@@ -448,7 +434,7 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
 
    assert(r);
 
-   for (i = 0; i < 5; ++i)
+   for (i = 0; i < 3; ++i)
       r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
 
    r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
@@ -459,7 +445,8 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            struct pipe_context *pipe,
                            unsigned buffer_width,
                            unsigned buffer_height,
-                           enum pipe_video_chroma_format chroma_format)
+                           enum pipe_video_chroma_format chroma_format,
+                           unsigned color_swizzle)
 {
    struct pipe_resource tex_templ, *tex_dummy;
    struct pipe_sampler_view sampler_view;
@@ -481,7 +468,7 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    if (!renderer->vs)
       goto error_vs_shaders;
 
-   renderer->fs = create_frag_shader(renderer);
+   renderer->fs = create_frag_shader(renderer, color_swizzle);
    if (!renderer->fs)
       goto error_fs_shaders;
 
@@ -539,40 +526,14 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 
 bool
 vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
-                         struct pipe_resource *y, struct pipe_resource *cb, struct pipe_resource *cr)
+                         struct pipe_sampler_view *source)
 {
-   struct pipe_sampler_view sampler_view;
-   struct pipe_resource *res[3];
-
-   unsigned i;
-
    assert(renderer && buffer);
-   assert(y && cb && cr);
+   assert(source);
 
-   res[0] = y;
-   res[1] = cb;
-   res[2] = cr;
-
-   for (i = 0; i < 3; ++i) {
-      memset(&sampler_view, 0, sizeof(sampler_view));
-      u_sampler_view_default_template(&sampler_view, res[i], res[i]->format);
-      sampler_view.swizzle_r = i == 0 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO;
-      sampler_view.swizzle_g = i == 1 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO;
-      sampler_view.swizzle_b = i == 2 ? PIPE_SWIZZLE_RED : PIPE_SWIZZLE_ZERO;
-      sampler_view.swizzle_a = PIPE_SWIZZLE_ONE;
-      buffer->sampler_views.all[i] = renderer->pipe->create_sampler_view(
-         renderer->pipe, res[i], &sampler_view);
-      if (!buffer->sampler_views.all[i])
-         goto error_samplers;
-   }
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
 
    return true;
-
-error_samplers:
-   for (i = 0; i < 3; ++i)
-      pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
-
-   return false;
 }
 
 void
@@ -582,7 +543,7 @@ vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer)
 
    assert(buffer);
 
-   for (i = 0; i < 5; ++i)
+   for (i = 0; i < 3; ++i)
       pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
 }
 
@@ -599,7 +560,14 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    if (not_empty_num_instances == 0 && empty_num_instances == 0)
       return;
 
+   renderer->viewport.scale[0] = surface->width;
+   renderer->viewport.scale[1] = surface->height;
+
+   renderer->fb_state.width = surface->width;
+   renderer->fb_state.height = surface->height;
    renderer->fb_state.cbufs[0] = surface;
+
+
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
    renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
@@ -610,8 +578,8 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    pipe_sampler_view_reference(&buffer->sampler_views.individual.ref[1],
                                ref[1] ? ref[1] : renderer->dummy);
 
-   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 5, buffer->sampler_views.all);
-   renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 5, renderer->samplers.all);
+   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 3, buffer->sampler_views.all);
+   renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 3, renderer->samplers.all);
 
    renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 0bb17ef487b..0a204d637b0 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -53,8 +53,8 @@ struct vl_mpeg12_mc_renderer
 
    union
    {
-      void *all[5];
-      struct { void *y, *cb, *cr, *ref[2]; } individual;
+      void *all[3];
+      struct { void *source, *ref[2]; } individual;
    } samplers;
 
    struct pipe_sampler_view *dummy;
@@ -64,8 +64,8 @@ struct vl_mpeg12_mc_buffer
 {
    union
    {
-      struct pipe_sampler_view *all[5];
-      struct { struct pipe_sampler_view *y, *cb, *cr, *ref[2]; } individual;
+      struct pipe_sampler_view *all[3];
+      struct { struct pipe_sampler_view *source, *ref[2]; } individual;
    } sampler_views;
 };
 
@@ -73,12 +73,13 @@ bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                                 struct pipe_context *pipe,
                                 unsigned picture_width,
                                 unsigned picture_height,
-                                enum pipe_video_chroma_format chroma_format);
+                                enum pipe_video_chroma_format chroma_format,
+                                unsigned color_swizzle);
 
 void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 
 bool vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
-                              struct pipe_resource *y, struct pipe_resource *cb, struct pipe_resource *cr);
+                              struct pipe_sampler_view *source);
 
 void vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer);
 
diff --git a/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c b/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c
index 56183891f9b..846e1cda1ae 100644
--- a/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c
@@ -97,25 +97,29 @@ struct vl_ycbcr_sampler_views *vl_ycbcr_get_sampler_views(struct vl_ycbcr_buffer
 
    pipe = buffer->pipe;
 
+   memset(&sv_templ, 0, sizeof(sv_templ));
+   u_sampler_view_default_template(&sv_templ, buffer->resources.y, buffer->resources.y->format);
+
+   // Workaround
+   if (util_format_get_nr_components(buffer->resources.y->format) == 1) {
+      sv_templ.swizzle_r = PIPE_SWIZZLE_RED;
+      sv_templ.swizzle_g = PIPE_SWIZZLE_RED;
+      sv_templ.swizzle_b = PIPE_SWIZZLE_RED;
+   }
+
    if (!buffer->sampler_views.y) {
-      memset(&sv_templ, 0, sizeof(sv_templ));
-      u_sampler_view_default_template(&sv_templ, buffer->resources.y, buffer->resources.y->format);
       buffer->sampler_views.y = pipe->create_sampler_view(pipe, buffer->resources.y, &sv_templ);
       if (!buffer->sampler_views.y)
          goto error;
    }
 
    if (!buffer->sampler_views.cb) {
-      memset(&sv_templ, 0, sizeof(sv_templ));
-      u_sampler_view_default_template(&sv_templ, buffer->resources.cb, buffer->resources.cb->format);
       buffer->sampler_views.cb = pipe->create_sampler_view(pipe, buffer->resources.cb, &sv_templ);
       if (!buffer->sampler_views.cb)
          goto error;
    }
 
    if (!buffer->sampler_views.cr) {
-      memset(&sv_templ, 0, sizeof(sv_templ));
-      u_sampler_view_default_template(&sv_templ, buffer->resources.cr, buffer->resources.cr->format);
       buffer->sampler_views.cr = pipe->create_sampler_view(pipe, buffer->resources.cr, &sv_templ);
       if (!buffer->sampler_views.cr)
          goto error;
-- 
cgit v1.2.3


From 4de5d81638a79fbd74eca63723f6f09727bf4b60 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 1 Apr 2011 23:47:20 +0200
Subject: [g3dvl] some minor cleanup

---
 src/gallium/auxiliary/vl/vl_ycbcr_buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c b/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c
index 846e1cda1ae..c67cec69723 100644
--- a/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c
@@ -100,11 +100,11 @@ struct vl_ycbcr_sampler_views *vl_ycbcr_get_sampler_views(struct vl_ycbcr_buffer
    memset(&sv_templ, 0, sizeof(sv_templ));
    u_sampler_view_default_template(&sv_templ, buffer->resources.y, buffer->resources.y->format);
 
-   // Workaround
    if (util_format_get_nr_components(buffer->resources.y->format) == 1) {
       sv_templ.swizzle_r = PIPE_SWIZZLE_RED;
       sv_templ.swizzle_g = PIPE_SWIZZLE_RED;
       sv_templ.swizzle_b = PIPE_SWIZZLE_RED;
+      sv_templ.swizzle_a = PIPE_SWIZZLE_RED;
    }
 
    if (!buffer->sampler_views.y) {
-- 
cgit v1.2.3


From 794cde3f5ef59cf603be284fbc8de33d2cda7d2c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 2 Apr 2011 12:05:22 +0200
Subject: [g3dvl] splitt vertex element state into y, cb, cr

---
 src/gallium/auxiliary/vl/vl_idct.c               |  30 +++--
 src/gallium/auxiliary/vl/vl_mpeg12_context.c     | 118 +++++++-----------
 src/gallium/auxiliary/vl/vl_mpeg12_context.h     |   6 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 151 ++++++++++-------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   5 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     |  79 ++++++------
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     |  16 ++-
 7 files changed, 173 insertions(+), 232 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 8cfb56ea416..c73b4765212 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -93,8 +93,8 @@ static void *
 create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle)
 {
    struct ureg_program *shader;
-   struct ureg_src vrect, vpos, vblock, eb[4];
-   struct ureg_src scale, blocks_xy, t_eb;
+   struct ureg_src vrect, vpos, vblock, eb;
+   struct ureg_src scale, blocks_xy;
    struct ureg_dst t_tex, t_start;
    struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
    unsigned label;
@@ -112,10 +112,7 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle)
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
 
-   eb[0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0);
-   eb[1] = ureg_DECL_vs_input(shader, VS_I_EB_1_0);
-   eb[2] = ureg_DECL_vs_input(shader, VS_I_EB_0_1);
-   eb[3] = ureg_DECL_vs_input(shader, VS_I_EB_1_1);
+   eb = ureg_DECL_vs_input(shader, VS_I_EB);
 
    o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
    o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
@@ -127,8 +124,7 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle)
     * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
     * blocks_xy = (blocks_x, blocks_y)
     *
-    * ar = vblock.y * blocks.x + vblock.x
-    * if eb[ar].(color_swizzle)
+    * if eb.(vblock.y, vblock.x)
     *    o_vpos.xy = -1
     * else
     *    t_tex = vpos * blocks_xy + vblock
@@ -150,18 +146,20 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle)
    blocks_xy = ureg_imm2f(shader, idct->blocks_x, idct->blocks_y);
 
    if (idct->blocks_x > 1 || idct->blocks_y > 1) {
-      struct ureg_dst ar = ureg_DECL_address(shader);
+      ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY),
+         ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_Y)),
+         ureg_swizzle(eb, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W),
+         ureg_swizzle(eb, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y));
 
-      ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X),
-               ureg_scalar(vblock, TGSI_SWIZZLE_Y), blocks_xy, vblock);
+      ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X),
+         ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_X)),
+         ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_Y),
+         ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_X));
 
-      ureg_ARL(shader, ureg_writemask(ar, TGSI_WRITEMASK_X), ureg_src(t_tex));
-      t_eb = ureg_src_indirect(eb[0], ureg_src(ar));
-   } else {
-      t_eb = eb[0];
+      eb = ureg_src(t_tex);
    }
 
-   ureg_IF(shader, ureg_scalar(t_eb, color_swizzle), &label);
+   ureg_IF(shader, ureg_scalar(eb, TGSI_SWIZZLE_X), &label);
 
       ureg_MOV(shader, o_vpos, ureg_imm1f(shader, -1.0f));
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 94ac70e2126..9f3da7381ba 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -94,8 +94,8 @@ vl_mpeg12_buffer_destroy(struct pipe_video_buffer *buffer)
    vl_ycbcr_buffer_cleanup(&buf->render_result);
    vl_vb_cleanup(&buf->vertex_stream);
    vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct_y);
-   vl_idct_cleanup_buffer(&ctx->idct_cb, &buf->idct_cb);
-   vl_idct_cleanup_buffer(&ctx->idct_cr, &buf->idct_cr);
+   vl_idct_cleanup_buffer(&ctx->idct_c, &buf->idct_cb);
+   vl_idct_cleanup_buffer(&ctx->idct_c, &buf->idct_cr);
    vl_mpeg12_mc_cleanup_buffer(&buf->mc_y);
    vl_mpeg12_mc_cleanup_buffer(&buf->mc_cb);
    vl_mpeg12_mc_cleanup_buffer(&buf->mc_cr);
@@ -115,8 +115,8 @@ vl_mpeg12_buffer_map(struct pipe_video_buffer *buffer)
 
    vl_vb_map(&buf->vertex_stream, ctx->pipe);
    vl_idct_map_buffers(&ctx->idct_y, &buf->idct_y);
-   vl_idct_map_buffers(&ctx->idct_cr, &buf->idct_cr);
-   vl_idct_map_buffers(&ctx->idct_cb, &buf->idct_cb);
+   vl_idct_map_buffers(&ctx->idct_c, &buf->idct_cb);
+   vl_idct_map_buffers(&ctx->idct_c, &buf->idct_cr);
 }
 
 static void
@@ -156,8 +156,8 @@ vl_mpeg12_buffer_unmap(struct pipe_video_buffer *buffer)
 
    vl_vb_unmap(&buf->vertex_stream, ctx->pipe);
    vl_idct_unmap_buffers(&ctx->idct_y, &buf->idct_y);
-   vl_idct_unmap_buffers(&ctx->idct_cr, &buf->idct_cr);
-   vl_idct_unmap_buffers(&ctx->idct_cb, &buf->idct_cb);
+   vl_idct_unmap_buffers(&ctx->idct_c, &buf->idct_cb);
+   vl_idct_unmap_buffers(&ctx->idct_c, &buf->idct_cr);
 }
 
 static void
@@ -182,36 +182,42 @@ vl_mpeg12_buffer_flush(struct pipe_video_buffer *buffer,
    ctx = (struct vl_mpeg12_context *)buf->base.context;
    assert(ctx);
 
+   surfaces = vl_ycbcr_get_surfaces(&buf->render_result);
+
+   sv_past = past ? vl_ycbcr_get_sampler_views(&past->render_result) : NULL;
+   sv_future = future ? vl_ycbcr_get_sampler_views(&future->render_result) : NULL;
+
    vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
 
    ctx->pipe->set_vertex_buffers(ctx->pipe, 2, buf->vertex_bufs.all);
-   ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
    ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
-   vl_idct_flush(&ctx->idct_y, &buf->idct_y, ne_num);
-   vl_idct_flush(&ctx->idct_cr, &buf->idct_cr, ne_num);
-   vl_idct_flush(&ctx->idct_cb, &buf->idct_cb, ne_num);
 
-   surfaces = vl_ycbcr_get_surfaces(&buf->render_result);
 
-   sv_past = past ? vl_ycbcr_get_sampler_views(&past->render_result) : NULL;
-   sv_future = future ? vl_ycbcr_get_sampler_views(&future->render_result) : NULL;
+   ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves_y);
+   vl_idct_flush(&ctx->idct_y, &buf->idct_y, ne_num);
 
    sv_refs[0] = sv_past ? sv_past->y : NULL;
    sv_refs[1] = sv_future ? sv_future->y : NULL;
 
-   vl_mpeg12_mc_renderer_flush(&ctx->mc_y, &buf->mc_y, surfaces->y,
+   vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc_y, surfaces->y,
                                sv_refs, ne_start, ne_num, e_start, e_num, fence);
 
+   ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves_cb);
+   vl_idct_flush(&ctx->idct_c, &buf->idct_cb, ne_num);
+
    sv_refs[0] = sv_past ? sv_past->cb : NULL;
    sv_refs[1] = sv_future ? sv_future->cb : NULL;
 
-   vl_mpeg12_mc_renderer_flush(&ctx->mc_cb, &buf->mc_cb, surfaces->cb,
+   vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc_cb, surfaces->cb,
                                sv_refs, ne_start, ne_num, e_start, e_num, fence);
 
+   ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves_cr);
+   vl_idct_flush(&ctx->idct_c, &buf->idct_cr, ne_num);
+
    sv_refs[0] = sv_past ? sv_past->cr : NULL;
    sv_refs[1] = sv_future ? sv_future->cr : NULL;
 
-   vl_mpeg12_mc_renderer_flush(&ctx->mc_cr, &buf->mc_cr, surfaces->cr,
+   vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc_cr, surfaces->cr,
                                sv_refs, ne_start, ne_num, e_start, e_num, fence);
 }
 
@@ -231,13 +237,12 @@ vl_mpeg12_destroy(struct pipe_video_context *vpipe)
    ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
 
    vl_compositor_cleanup(&ctx->compositor);
-   vl_mpeg12_mc_renderer_cleanup(&ctx->mc_y);
-   vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cb);
-   vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cr);
+   vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
    vl_idct_cleanup(&ctx->idct_y);
-   vl_idct_cleanup(&ctx->idct_cr);
-   vl_idct_cleanup(&ctx->idct_cb);
-   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->vertex_elems_state);
+   vl_idct_cleanup(&ctx->idct_c);
+   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves_y);
+   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves_cb);
+   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves_cr);
    pipe_resource_reference(&ctx->quads.buffer, NULL);
    ctx->pipe->destroy(ctx->pipe);
 
@@ -353,13 +358,13 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
       return NULL;
    }
 
-   if (!vl_idct_init_buffer(&ctx->idct_cb, &buffer->idct_cb,
+   if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct_cb,
                             idct_views->cb, idct_surfaces->cb)) {
       FREE(buffer);
       return NULL;
    }
 
-   if (!vl_idct_init_buffer(&ctx->idct_cr, &buffer->idct_cr,
+   if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct_cr,
                             idct_views->cr, idct_surfaces->cr)) {
       FREE(buffer);
       return NULL;
@@ -367,17 +372,17 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
 
    mc_views = vl_ycbcr_get_sampler_views(&buffer->idct_2_mc);
 
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc_y, &buffer->mc_y, mc_views->y)) {
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_y, mc_views->y)) {
       FREE(buffer);
       return NULL;
    }
 
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc_cb, &buffer->mc_cb, mc_views->cb)) {
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_cb, mc_views->cb)) {
       FREE(buffer);
       return NULL;
    }
 
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc_cr, &buffer->mc_cr, mc_views->cr)) {
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_cr, mc_views->cr)) {
       FREE(buffer);
       return NULL;
    }
@@ -637,14 +642,10 @@ init_idct(struct vl_mpeg12_context *ctx, unsigned buffer_width, unsigned buffer_
       chroma_blocks_y = 2;
    }
 
-   if(!vl_idct_init(&ctx->idct_cb, ctx->pipe, chroma_width, chroma_height,
+   if(!vl_idct_init(&ctx->idct_c, ctx->pipe, chroma_width, chroma_height,
                     chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
       return false;
 
-   if(!vl_idct_init(&ctx->idct_cr, ctx->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Z, idct_matrix))
-      return false;
-
    return true;
 }
 
@@ -690,13 +691,9 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
 
    ctx->quads = vl_vb_upload_quads(ctx->pipe, 2, 2);
    ctx->vertex_buffer_size = width / MACROBLOCK_WIDTH * height / MACROBLOCK_HEIGHT;
-   ctx->vertex_elems_state = vl_vb_get_elems_state(ctx->pipe, true);
-
-   if (ctx->vertex_elems_state == NULL) {
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
-   }
+   ctx->ves_y = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_X);
+   ctx->ves_cb = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Y);
+   ctx->ves_cr = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Z);
 
    ctx->buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
    ctx->buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
@@ -707,34 +704,9 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
       return NULL;
    }
 
-   if (!vl_mpeg12_mc_renderer_init(&ctx->mc_y, ctx->pipe,
-                                   ctx->buffer_width, ctx->buffer_height,
-                                   chroma_format, TGSI_SWIZZLE_X)) {
-      vl_idct_cleanup(&ctx->idct_y);
-      vl_idct_cleanup(&ctx->idct_cr);
-      vl_idct_cleanup(&ctx->idct_cb);
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
-   }
-
-   if (!vl_mpeg12_mc_renderer_init(&ctx->mc_cb, ctx->pipe,
-                                   ctx->buffer_width, ctx->buffer_height,
-                                   chroma_format, TGSI_SWIZZLE_Y)) {
-      vl_idct_cleanup(&ctx->idct_y);
-      vl_idct_cleanup(&ctx->idct_cr);
-      vl_idct_cleanup(&ctx->idct_cb);
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
-   }
-
-   if (!vl_mpeg12_mc_renderer_init(&ctx->mc_cr, ctx->pipe,
-                                   ctx->buffer_width, ctx->buffer_height,
-                                   chroma_format, TGSI_SWIZZLE_Z)) {
+   if (!vl_mpeg12_mc_renderer_init(&ctx->mc, ctx->pipe, ctx->buffer_width, ctx->buffer_height)) {
       vl_idct_cleanup(&ctx->idct_y);
-      vl_idct_cleanup(&ctx->idct_cr);
-      vl_idct_cleanup(&ctx->idct_cb);
+      vl_idct_cleanup(&ctx->idct_c);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
@@ -742,11 +714,8 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
 
    if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) {
       vl_idct_cleanup(&ctx->idct_y);
-      vl_idct_cleanup(&ctx->idct_cr);
-      vl_idct_cleanup(&ctx->idct_cb);
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_y);
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cb);
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cr);
+      vl_idct_cleanup(&ctx->idct_c);
+      vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
@@ -754,11 +723,8 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
 
    if (!init_pipe_state(ctx)) {
       vl_idct_cleanup(&ctx->idct_y);
-      vl_idct_cleanup(&ctx->idct_cr);
-      vl_idct_cleanup(&ctx->idct_cb);
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_y);
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cb);
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc_cr);
+      vl_idct_cleanup(&ctx->idct_c);
+      vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
       vl_compositor_cleanup(&ctx->compositor);
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
index f84e23508ed..e0c6ca94c45 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -49,10 +49,10 @@ struct vl_mpeg12_context
 
    struct pipe_vertex_buffer quads;
    unsigned vertex_buffer_size;
-   void *vertex_elems_state;
+   void *ves_y, *ves_cb, *ves_cr;
 
-   struct vl_idct idct_y, idct_cb, idct_cr;
-   struct vl_mpeg12_mc_renderer mc_y, mc_cb, mc_cr;
+   struct vl_idct idct_y, idct_c;
+   struct vl_mpeg12_mc_renderer mc;
    struct vl_compositor compositor;
 
    void *rast;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 7c3fea557ab..912dea3c57f 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -44,16 +44,12 @@ enum VS_OUTPUT
 {
    VS_O_VPOS,
    VS_O_LINE,
-   VS_O_TEX0,
-   VS_O_TEX1,
-   VS_O_TEX2,
-   VS_O_EB_0,
-   VS_O_EB_1,
-   VS_O_INFO,
-   VS_O_MV0,
-   VS_O_MV1,
-   VS_O_MV2,
-   VS_O_MV3
+   VS_O_TEX_TOP,
+   VS_O_TEX_BOTTOM,
+   VS_O_MV0_TOP,
+   VS_O_MV0_BOTTOM,
+   VS_O_MV1_TOP,
+   VS_O_MV1_BOTTOM
 };
 
 static void *
@@ -61,10 +57,10 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
    struct ureg_src block_scale, mv_scale;
-   struct ureg_src vrect, vpos, eb[2][2], vmv[4];
+   struct ureg_src vrect, vpos, eb, flags, vmv[2][2];
    struct ureg_dst t_vpos, t_vtex, t_vmv;
-   struct ureg_dst o_vpos, o_line, o_vtex[3], o_eb[2], o_vmv[4], o_info;
-   unsigned i, label;
+   struct ureg_dst o_vpos, o_line, o_vtex[2], o_vmv[2][2];
+   unsigned i, j, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
@@ -76,24 +72,21 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
-   eb[0][0] = ureg_DECL_vs_input(shader, VS_I_EB_0_0);
-   eb[1][0] = ureg_DECL_vs_input(shader, VS_I_EB_1_0);
-   eb[0][1] = ureg_DECL_vs_input(shader, VS_I_EB_0_1);
-   eb[1][1] = ureg_DECL_vs_input(shader, VS_I_EB_1_1);
+   eb = ureg_DECL_vs_input(shader, VS_I_EB);
+   flags = ureg_DECL_vs_input(shader, VS_I_FLAGS);
+   vmv[0][0] = ureg_DECL_vs_input(shader, VS_I_MV0_TOP);
+   vmv[0][1] = ureg_DECL_vs_input(shader, VS_I_MV0_BOTTOM);
+   vmv[1][0] = ureg_DECL_vs_input(shader, VS_I_MV1_TOP);
+   vmv[1][1] = ureg_DECL_vs_input(shader, VS_I_MV1_BOTTOM);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
    o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
-   o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0);
-   o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1);
-   o_vtex[2] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2);
-   o_eb[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0);
-   o_eb[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1);
-   o_info = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO);
-
-   for (i = 0; i < 4; ++i) {
-     vmv[i] = ureg_DECL_vs_input(shader, VS_I_MV0 + i);
-     o_vmv[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i);
-   }
+   o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_TOP);
+   o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_BOTTOM);
+   o_vmv[0][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_TOP);
+   o_vmv[0][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_BOTTOM);
+   o_vmv[1][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_TOP);
+   o_vmv[1][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_BOTTOM);
 
    /*
     * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
@@ -143,31 +136,35 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
-   ureg_CMP(shader, ureg_writemask(o_eb[0], TGSI_WRITEMASK_XYZ),
-            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
-            eb[0][1], eb[0][0]);
-   ureg_CMP(shader, ureg_writemask(o_eb[1], TGSI_WRITEMASK_XYZ),
-            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
-            eb[1][1], eb[1][0]);
-
-   ureg_MOV(shader, ureg_writemask(o_info, TGSI_WRITEMASK_X),
-            ureg_scalar(eb[1][0], TGSI_SWIZZLE_W));
-   ureg_MUL(shader, ureg_writemask(o_info, TGSI_WRITEMASK_Y),
-            ureg_scalar(eb[1][1], TGSI_SWIZZLE_W),
+   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_Z),
+            ureg_scalar(flags, TGSI_SWIZZLE_W),
             ureg_imm1f(shader, 0.5f));
 
-   for (i = 0; i < 4; ++i)
-      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
+   for (i = 0; i < 2; ++i)
+      for (j = 0; j < 2; ++j) {
+         ureg_MAD(shader, ureg_writemask(o_vmv[i][j], TGSI_WRITEMASK_XY), mv_scale, vmv[i][j], ureg_src(t_vpos));
+         ureg_MOV(shader, ureg_writemask(o_vmv[i][j], TGSI_WRITEMASK_Z), ureg_src(t_vpos));
+      }
 
    ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_CMP(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_Z),
+            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
+            ureg_scalar(eb, TGSI_SWIZZLE_Y),
+            ureg_scalar(eb, TGSI_SWIZZLE_X));
+
    ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-   ureg_MOV(shader, ureg_writemask(o_vtex[2], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_CMP(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_Z),
+            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
+            ureg_scalar(eb, TGSI_SWIZZLE_W),
+            ureg_scalar(eb, TGSI_SWIZZLE_Z));
 
    ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_scalar(vrect, TGSI_SWIZZLE_Y));
    ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
       vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
+   ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Z),
+            ureg_scalar(flags, TGSI_SWIZZLE_Z));
 
-   ureg_IF(shader, ureg_scalar(eb[0][0], TGSI_SWIZZLE_W), &label);
+   ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_X), &label);
 
       ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
       ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
@@ -206,37 +203,31 @@ calc_field(struct ureg_program *shader)
     * line.x going from 0 to 1 if not interlaced
     * line.x going from 0 to 8 in steps of 0.5 if interlaced
     * line.y going from 0 to 8 in steps of 0.5
+    * line.z is flag for intra frames
     *
     * tmp.xy = fraction(line)
     * tmp.xy = tmp.xy >= 0.5 ? 1 : 0
     */
    ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line);
    ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
+   ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), line);
 
    return tmp;
 }
 
 static struct ureg_dst
-fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field, unsigned color_swizzle)
+fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src tc[3], sampler, eb[2];
-   struct ureg_dst texel, t_tc, t_eb_info;
+   struct ureg_src tc[2], sampler;
+   struct ureg_dst texel, t_tc;
    unsigned label;
 
    texel = ureg_DECL_temporary(shader);
    t_tc = ureg_DECL_temporary(shader);
-   t_eb_info = ureg_DECL_temporary(shader);
 
-   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX0, TGSI_INTERPOLATE_LINEAR);
-   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX1, TGSI_INTERPOLATE_LINEAR);
-   tc[2] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX2, TGSI_INTERPOLATE_LINEAR);
+   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_TOP, TGSI_INTERPOLATE_LINEAR);
+   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_BOTTOM, TGSI_INTERPOLATE_LINEAR);
 
-   eb[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_0, TGSI_INTERPOLATE_CONSTANT);
-   eb[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_EB_1, TGSI_INTERPOLATE_CONSTANT);
-
-   //for (i = 0; i < 3; ++i)  {
-   //   sampler[i] = ureg_DECL_sampler(shader, i);
-   //}
    sampler = ureg_DECL_sampler(shader, 0);
 
    /*
@@ -245,31 +236,21 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
     * texel.cr = tex(tc[2], sampler[2])
     */
 
-   ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XY),
+   ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XYZ),
             ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)),
             tc[1], tc[0]);
 
-   ureg_CMP(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)),
-            eb[1], eb[0]);
-
-   /* r600g is ignoring TGSI_INTERPOLATE_CONSTANT, just workaround this */
-   ureg_SLT(shader, ureg_writemask(t_eb_info, TGSI_WRITEMASK_XYZ), ureg_src(t_eb_info), ureg_imm1f(shader, 0.5f));
+   ureg_SLT(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc), ureg_imm1f(shader, 0.5f));
 
    ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.0f));
-   ureg_IF(shader, ureg_scalar(ureg_src(t_eb_info), color_swizzle), &label);
+   ureg_IF(shader, ureg_scalar(ureg_src(t_tc), TGSI_SWIZZLE_Z), &label);
 
-      if(color_swizzle==TGSI_SWIZZLE_X || r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444) {
-         ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
-      } else {
-         ureg_TEX(shader, texel, TGSI_TEXTURE_3D, tc[2], sampler);
-      }
+      ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
 
    ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
 
    ureg_release_temporary(shader, t_tc);
-   ureg_release_temporary(shader, t_eb_info);
 
    return texel;
 }
@@ -277,15 +258,14 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
 static struct ureg_dst
 fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 {
-   struct ureg_src info;
-   struct ureg_src tc[4], sampler[2];
+   struct ureg_src tc[2][2], sampler[2];
    struct ureg_dst ref[2], result;
    unsigned i, intra_label;
 
-   info = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_INFO, TGSI_INTERPOLATE_CONSTANT);
-
-   for (i = 0; i < 4; ++i)
-      tc[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0 + i, TGSI_INTERPOLATE_LINEAR);
+   tc[0][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_TOP, TGSI_INTERPOLATE_LINEAR);
+   tc[0][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_BOTTOM, TGSI_INTERPOLATE_LINEAR);
+   tc[1][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_TOP, TGSI_INTERPOLATE_LINEAR);
+   tc[1][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_BOTTOM, TGSI_INTERPOLATE_LINEAR);
 
    for (i = 0; i < 2; ++i) {
       sampler[i] = ureg_DECL_sampler(shader, i + 1);
@@ -296,7 +276,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 
    ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
 
-   ureg_IF(shader, ureg_scalar(info, TGSI_SWIZZLE_X), &intra_label);
+   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z), &intra_label);
       /*
        * if (field.z)
        *    ref[0..1] = tex(tc[0..1], sampler[0..1])
@@ -306,16 +286,16 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
        */
       ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY),
                ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
-               tc[1], tc[0]);
+               tc[0][1], tc[0][0]);
       ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY),
                ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
-               tc[3], tc[2]);
+               tc[1][1], tc[1][0]);
 
       ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
       ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
 
       ureg_LRP(shader, result,
-               ureg_scalar(info, TGSI_SWIZZLE_Y),
+               ureg_scalar(tc[0][0], TGSI_SWIZZLE_Z),
                ureg_src(ref[1]), ureg_src(ref[0]));
 
    ureg_fixup_label(shader, intra_label, ureg_get_instruction_number(shader));
@@ -328,7 +308,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 }
 
 static void *
-create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned color_swizzle)
+create_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
    struct ureg_dst result;
@@ -342,7 +322,7 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r, unsigned color_swizzle)
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    field = calc_field(shader);
-   texel = fetch_ycbcr(r, shader, field, color_swizzle);
+   texel = fetch_ycbcr(r, shader, field);
 
    result = fetch_ref(shader, field);
 
@@ -444,9 +424,7 @@ bool
 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            struct pipe_context *pipe,
                            unsigned buffer_width,
-                           unsigned buffer_height,
-                           enum pipe_video_chroma_format chroma_format,
-                           unsigned color_swizzle)
+                           unsigned buffer_height)
 {
    struct pipe_resource tex_templ, *tex_dummy;
    struct pipe_sampler_view sampler_view;
@@ -459,7 +437,6 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    renderer->pipe = pipe;
    renderer->buffer_width = buffer_width;
    renderer->buffer_height = buffer_height;
-   renderer->chroma_format = chroma_format;
 
    if (!init_pipe_state(renderer))
       goto error_pipe_state;
@@ -468,7 +445,7 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    if (!renderer->vs)
       goto error_vs_shaders;
 
-   renderer->fs = create_frag_shader(renderer, color_swizzle);
+   renderer->fs = create_frag_shader(renderer);
    if (!renderer->fs)
       goto error_fs_shaders;
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 0a204d637b0..052d7d6a30f 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -42,7 +42,6 @@ struct vl_mpeg12_mc_renderer
    struct pipe_context *pipe;
    unsigned buffer_width;
    unsigned buffer_height;
-   enum pipe_video_chroma_format chroma_format;
 
    struct pipe_viewport_state viewport;
    struct pipe_framebuffer_state fb_state;
@@ -72,9 +71,7 @@ struct vl_mpeg12_mc_buffer
 bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                                 struct pipe_context *pipe,
                                 unsigned picture_width,
-                                unsigned picture_height,
-                                enum pipe_video_chroma_format chroma_format,
-                                unsigned color_swizzle);
+                                unsigned picture_height);
 
 void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index bbac8902977..59aa1e9db75 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -33,12 +33,11 @@
 struct vl_vertex_stream
 {
    struct vertex2s pos;
-   struct {
-      int8_t y;
-      int8_t cr;
-      int8_t cb;
-      int8_t flag;
-   } eb[2][2];
+   int8_t eb[3][2][2];
+   int8_t dct_type_field;
+   int8_t mo_type_frame;
+   int8_t mb_type_intra;
+   int8_t mv_wheights;
    struct vertex2s mv[4];
 };
 
@@ -121,7 +120,10 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements
    assert(elements && num_elements);
 
    for ( i = 0; i < num_elements; ++i ) {
-      elements[i].src_offset = offset;
+      if (elements[i].src_offset)
+         offset = elements[i].src_offset;
+      else
+         elements[i].src_offset = offset;
       elements[i].instance_divisor = 1;
       elements[i].vertex_buffer_index = vertex_buffer_index;
       offset += util_format_get_blocksize(elements[i].src_format);
@@ -129,37 +131,39 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements
 }
 
 void *
-vl_vb_get_elems_state(struct pipe_context *pipe, bool include_mvs)
+vl_vb_get_elems_state(struct pipe_context *pipe, int component)
 {
    struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
 
-   unsigned i;
-
    memset(&vertex_elems, 0, sizeof(vertex_elems));
    vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
 
    /* Position element */
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
 
-   /* y, cr, cb empty block element top left block */
-   vertex_elems[VS_I_EB_0_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
+   /* empty block element of selected component */
+   vertex_elems[VS_I_EB].src_offset = 4 + component * 4;
+   vertex_elems[VS_I_EB].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
+
+   /* flags */
+   vertex_elems[VS_I_FLAGS].src_offset = 16;
+   vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
 
-   /* y, cr, cb empty block element top right block */
-   vertex_elems[VS_I_EB_0_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
+   /* motion vector 0 TOP element */
+   vertex_elems[VS_I_MV0_TOP].src_format = PIPE_FORMAT_R16G16_SSCALED;
 
-   /* y, cr, cb empty block element bottom left block */
-   vertex_elems[VS_I_EB_1_0].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
+   /* motion vector 0 BOTTOM element */
+   vertex_elems[VS_I_MV0_BOTTOM].src_format = PIPE_FORMAT_R16G16_SSCALED;
 
-   /* y, cr, cb empty block element bottom right block */
-   vertex_elems[VS_I_EB_1_1].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
+   /* motion vector 1 TOP element */
+   vertex_elems[VS_I_MV1_TOP].src_format = PIPE_FORMAT_R16G16_SSCALED;
 
-   for (i = 0; i < 4; ++i)
-      /* motion vector 0..4 element */
-      vertex_elems[VS_I_MV0 + i].src_format = PIPE_FORMAT_R16G16_SSCALED;
+   /* motion vector 1 BOTTOM element */
+   vertex_elems[VS_I_MV1_BOTTOM].src_format = PIPE_FORMAT_R16G16_SSCALED;
 
-   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - (include_mvs ? 1 : 5), 1);
+   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - 1, 1);
 
-   return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS - (include_mvs ? 0 : 4), vertex_elems);
+   return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems);
 }
 
 struct pipe_vertex_buffer
@@ -256,7 +260,7 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *
                 const unsigned (*empty_block_mask)[3][2][2])
 {
    struct vl_vertex_stream *stream;
-   unsigned i, j;
+   unsigned i, j, k;
 
    assert(buffer);
    assert(mb);
@@ -269,28 +273,29 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *
    stream->pos.x = mb->mbx;
    stream->pos.y = mb->mby;
 
-   for ( i = 0; i < 2; ++i) {
-      for ( j = 0; j < 2; ++j) {
-         stream->eb[i][j].y = !(mb->cbp & (*empty_block_mask)[0][i][j]);
-         stream->eb[i][j].cr = !(mb->cbp & (*empty_block_mask)[1][i][j]);
-         stream->eb[i][j].cb = !(mb->cbp & (*empty_block_mask)[2][i][j]);
-      }
-   }
-   stream->eb[0][0].flag = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
-   stream->eb[0][1].flag = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME;
-   stream->eb[1][0].flag = mb->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA;
+   for ( i = 0; i < 3; ++i)
+      for ( j = 0; j < 2; ++j)
+         for ( k = 0; k < 2; ++k)
+            stream->eb[i][j][k] = !(mb->cbp & (*empty_block_mask)[i][j][k]);
+
+   stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
+   stream->mo_type_frame = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME;
+   stream->mb_type_intra = mb->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA;
    switch (mb->mb_type) {
       case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
-         stream->eb[1][1].flag = 0;
+         stream->mv_wheights = 0;
          break;
 
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
-         stream->eb[1][1].flag = 1;
+         stream->mv_wheights = 1;
          break;
 
       case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
-         stream->eb[1][1].flag = 2;
+         stream->mv_wheights = 2;
          break;
+
+      default:
+         stream->mv_wheights = 0;
    }
 
    get_motion_vectors(mb, stream->mv);
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 88e0270c170..837d8bd53f9 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -41,14 +41,12 @@ enum VS_INPUT
 {
    VS_I_RECT,
    VS_I_VPOS,
-   VS_I_EB_0_0,
-   VS_I_EB_0_1,
-   VS_I_EB_1_0,
-   VS_I_EB_1_1,
-   VS_I_MV0,
-   VS_I_MV1,
-   VS_I_MV2,
-   VS_I_MV3,
+   VS_I_EB,
+   VS_I_FLAGS,
+   VS_I_MV0_TOP,
+   VS_I_MV0_BOTTOM,
+   VS_I_MV1_TOP,
+   VS_I_MV1_BOTTOM,
 
    NUM_VS_INPUTS
 };
@@ -67,7 +65,7 @@ struct vl_vertex_buffer
 struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe,
                                              unsigned blocks_x, unsigned blocks_y);
 
-void *vl_vb_get_elems_state(struct pipe_context *pipe, bool include_mvs);
+void *vl_vb_get_elems_state(struct pipe_context *pipe, int component);
 
 struct pipe_vertex_buffer vl_vb_init(struct vl_vertex_buffer *buffer,
                                      struct pipe_context *pipe,
-- 
cgit v1.2.3


From 71ee815b5cb0612906f9400f9a06ce8b7cdd51b3 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 2 Apr 2011 19:50:37 +0200
Subject: [g3dvl] remove PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT

It wasn't fully implemented anyway.
---
 src/gallium/auxiliary/vl/vl_mpeg12_context.c  | 20 ++++++--------------
 src/gallium/auxiliary/vl/vl_mpeg12_context.h  |  4 +---
 src/gallium/drivers/nv40/nv40_video_context.c |  3 +--
 src/gallium/drivers/nvfx/nvfx_video_context.c |  3 +--
 src/gallium/drivers/r600/r600_video_context.c |  3 +--
 src/gallium/drivers/softpipe/sp_screen.c      |  3 +--
 src/gallium/include/pipe/p_video_context.h    |  3 ---
 7 files changed, 11 insertions(+), 28 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 9f3da7381ba..8ea6fd08449 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -256,17 +256,11 @@ vl_mpeg12_get_param(struct pipe_video_context *vpipe, int param)
 
    assert(vpipe);
 
-   switch (param) {
-      case PIPE_CAP_NPOT_TEXTURES:
-         return !ctx->pot_buffers;
-      case PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT:
-         return ctx->decode_format;
-      default:
-      {
-         debug_printf("vl_mpeg12_context: Unknown PIPE_CAP %d\n", param);
-         return 0;
-      }
-   }
+   if (param == PIPE_CAP_NPOT_TEXTURES)
+      return !ctx->pot_buffers;
+
+   debug_printf("vl_mpeg12_context: Unknown PIPE_CAP %d\n", param);
+   return 0;
 }
 
 static struct pipe_surface *
@@ -654,8 +648,7 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
                          enum pipe_video_profile profile,
                          enum pipe_video_chroma_format chroma_format,
                          unsigned width, unsigned height,
-                         bool pot_buffers,
-                         enum pipe_format decode_format)
+                         bool pot_buffers)
 {
    struct vl_mpeg12_context *ctx;
 
@@ -686,7 +679,6 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
    ctx->base.set_csc_matrix = vl_mpeg12_set_csc_matrix;
 
    ctx->pipe = pipe;
-   ctx->decode_format = decode_format;
    ctx->pot_buffers = pot_buffers;
 
    ctx->quads = vl_vb_upload_quads(ctx->pipe, 2, 2);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
index e0c6ca94c45..3cc052941b2 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -41,7 +41,6 @@ struct vl_mpeg12_context
 {
    struct pipe_video_context base;
    struct pipe_context *pipe;
-   enum pipe_format decode_format;
    bool pot_buffers;
    unsigned buffer_width, buffer_height;
 
@@ -89,7 +88,6 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
                          enum pipe_video_profile profile,
                          enum pipe_video_chroma_format chroma_format,
                          unsigned width, unsigned height,
-                         bool pot_buffers,
-                         enum pipe_format decode_format);
+                         bool pot_buffers);
 
 #endif /* VL_MPEG12_CONTEXT_H */
diff --git a/src/gallium/drivers/nv40/nv40_video_context.c b/src/gallium/drivers/nv40/nv40_video_context.c
index 35395e848fc..34bb7cdbdda 100644
--- a/src/gallium/drivers/nv40/nv40_video_context.c
+++ b/src/gallium/drivers/nv40/nv40_video_context.c
@@ -47,8 +47,7 @@ nv40_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
          return vl_create_mpeg12_context(pipe, profile,
                                          chroma_format,
                                          width, height,
-                                         true,
-                                         PIPE_FORMAT_XYUV);
+                                         true);
       default:
          return NULL;
    }
diff --git a/src/gallium/drivers/nvfx/nvfx_video_context.c b/src/gallium/drivers/nvfx/nvfx_video_context.c
index 0456926c9ad..01a84f2ebc6 100644
--- a/src/gallium/drivers/nvfx/nvfx_video_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_video_context.c
@@ -47,8 +47,7 @@ nvfx_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
          return vl_create_mpeg12_context(pipe, profile,
                                          chroma_format,
                                          width, height,
-                                         true,
-                                         PIPE_FORMAT_XYUV);
+                                         true);
       default:
          return NULL;
    }
diff --git a/src/gallium/drivers/r600/r600_video_context.c b/src/gallium/drivers/r600/r600_video_context.c
index 8190c9ae612..c1b0c098245 100644
--- a/src/gallium/drivers/r600/r600_video_context.c
+++ b/src/gallium/drivers/r600/r600_video_context.c
@@ -47,8 +47,7 @@ r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
          return vl_create_mpeg12_context(pipe, profile,
                                          chroma_format,
                                          width, height,
-                                         false,
-                                         PIPE_FORMAT_XYUV);
+                                         false);
       default:
          return NULL;
    }
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 26f5e1b5740..d5936641ba5 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -307,8 +307,7 @@ sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
          return vl_create_mpeg12_context(pipe, profile,
                                          chroma_format,
                                          width, height,
-                                         true,
-                                         PIPE_FORMAT_XYUV);
+                                         true);
       default:
          return NULL;
    }
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 09e2d2702c7..7786f0a6f6f 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -34,9 +34,6 @@ extern "C" {
 
 #include <pipe/p_video_state.h>
 
-/* XXX: Move to an appropriate place */
-#define PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT 256
-
 struct pipe_screen;
 struct pipe_buffer;
 struct pipe_surface;
-- 
cgit v1.2.3


From 3e92b4fd14b731bd6984aafd220059e27948aea8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 2 Apr 2011 20:29:27 +0200
Subject: g3dvl/xvmc: fix a stupid of by one bug

---
 src/gallium/state_trackers/xorg/xvmc/surface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 67dc57d4344..23f97b3dac5 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -155,7 +155,7 @@ unmap_and_flush_surface(XvMCSurfacePrivate *surface)
 
    assert(surface);
 
-   for ( i = 0; i < 3; ++i ) {
+   for ( i = 0; i < 2; ++i ) {
       if (surface->ref_surfaces[i]) {
          XvMCSurfacePrivate *ref = surface->ref_surfaces[i]->privData;
 
-- 
cgit v1.2.3


From e6049aa0a990a630ed36a6d83e7526cc5e09bb19 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 2 Apr 2011 22:26:06 +0200
Subject: [g3dvl] rework mpeg12 context error handling

---
 src/gallium/auxiliary/vl/vl_mpeg12_context.c | 157 ++++++++++++++++-----------
 1 file changed, 93 insertions(+), 64 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 8ea6fd08449..ac59793ea41 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -315,73 +315,98 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
 
    buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, ctx->pipe,
                                                       ctx->vertex_buffer_size);
+   if (!buffer->vertex_bufs.individual.stream.buffer)
+      goto error_vertex_stream;
 
    if (!vl_ycbcr_buffer_init(&buffer->idct_source, ctx->pipe,
                              ctx->buffer_width, ctx->buffer_height,
                              ctx->base.chroma_format,
                              PIPE_FORMAT_R16G16B16A16_SNORM,
-                             PIPE_USAGE_STREAM)) {
-      FREE(buffer);
-      return NULL;
-   }
+                             PIPE_USAGE_STREAM))
+      goto error_idct_source;
 
    if (!vl_ycbcr_buffer_init(&buffer->idct_2_mc, ctx->pipe,
                              ctx->buffer_width, ctx->buffer_height,
                              ctx->base.chroma_format,
                              PIPE_FORMAT_R16_SNORM,
-                             PIPE_USAGE_STATIC)) {
-      FREE(buffer);
-      return NULL;
-   }
+                             PIPE_USAGE_STATIC))
+      goto error_idct_2_mc;
 
    if (!vl_ycbcr_buffer_init(&buffer->render_result, ctx->pipe,
                              ctx->buffer_width, ctx->buffer_height,
                              ctx->base.chroma_format,
                              PIPE_FORMAT_R8_SNORM,
-                             PIPE_USAGE_STATIC)) {
-      FREE(buffer);
-      return NULL;
-   }
+                             PIPE_USAGE_STATIC))
+      goto error_render_result;
 
    idct_views = vl_ycbcr_get_sampler_views(&buffer->idct_source);
+   if (!idct_views)
+      goto error_idct_views;
+
    idct_surfaces = vl_ycbcr_get_surfaces(&buffer->idct_2_mc);
+   if (!idct_surfaces)
+      goto error_idct_surfaces;
 
    if (!vl_idct_init_buffer(&ctx->idct_y, &buffer->idct_y,
-                            idct_views->y, idct_surfaces->y)) {
-      FREE(buffer);
-      return NULL;
-   }
+                            idct_views->y, idct_surfaces->y))
+      goto error_idct_y;
 
    if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct_cb,
-                            idct_views->cb, idct_surfaces->cb)) {
-      FREE(buffer);
-      return NULL;
-   }
+                            idct_views->cb, idct_surfaces->cb))
+      goto error_idct_cb;
 
    if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct_cr,
-                            idct_views->cr, idct_surfaces->cr)) {
-      FREE(buffer);
-      return NULL;
-   }
+                            idct_views->cr, idct_surfaces->cr))
+      goto error_idct_cr;
 
    mc_views = vl_ycbcr_get_sampler_views(&buffer->idct_2_mc);
+   if (!mc_views)
+      goto error_mc_views;
 
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_y, mc_views->y)) {
-      FREE(buffer);
-      return NULL;
-   }
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_y, mc_views->y))
+      goto error_mc_y;
 
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_cb, mc_views->cb)) {
-      FREE(buffer);
-      return NULL;
-   }
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_cb, mc_views->cb))
+      goto error_mc_cb;
 
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_cr, mc_views->cr)) {
-      FREE(buffer);
-      return NULL;
-   }
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_cr, mc_views->cr))
+      goto error_mc_cr;
 
    return &buffer->base;
+
+error_mc_cr:
+   vl_mpeg12_mc_cleanup_buffer(&buffer->mc_cb);
+
+error_mc_cb:
+   vl_mpeg12_mc_cleanup_buffer(&buffer->mc_y);
+
+error_mc_y:
+error_mc_views:
+   vl_idct_cleanup_buffer(&ctx->idct_c, &buffer->idct_cr);
+
+error_idct_cr:
+   vl_idct_cleanup_buffer(&ctx->idct_c, &buffer->idct_cb);
+
+error_idct_cb:
+   vl_idct_cleanup_buffer(&ctx->idct_y, &buffer->idct_y);
+
+error_idct_y:
+error_idct_surfaces:
+error_idct_views:
+   vl_ycbcr_buffer_cleanup(&buffer->render_result);
+
+error_render_result:
+   vl_ycbcr_buffer_cleanup(&buffer->idct_2_mc);
+
+error_idct_2_mc:
+   vl_ycbcr_buffer_cleanup(&buffer->idct_source);
+
+error_idct_source:
+   vl_vb_cleanup(&buffer->vertex_stream);
+
+error_vertex_stream:
+   FREE(buffer);
+   return NULL;
 }
 
 static boolean
@@ -613,11 +638,11 @@ init_idct(struct vl_mpeg12_context *ctx, unsigned buffer_width, unsigned buffer_
    ctx->empty_block_mask = &const_empty_block_mask_420;
 
    if (!(idct_matrix = vl_idct_upload_matrix(ctx->pipe)))
-      return false;
+      goto error_idct_matrix;
 
    if (!vl_idct_init(&ctx->idct_y, ctx->pipe, buffer_width, buffer_height,
                      2, 2, TGSI_SWIZZLE_X, idct_matrix))
-      return false;
+      goto error_idct_y;
 
    if (ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
       chroma_width = buffer_width / 2;
@@ -638,9 +663,19 @@ init_idct(struct vl_mpeg12_context *ctx, unsigned buffer_width, unsigned buffer_
 
    if(!vl_idct_init(&ctx->idct_c, ctx->pipe, chroma_width, chroma_height,
                     chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
-      return false;
+      goto error_idct_c;
 
+   pipe_sampler_view_reference(&idct_matrix, NULL);
    return true;
+
+error_idct_c:
+   vl_idct_cleanup(&ctx->idct_y);
+
+error_idct_y:
+   pipe_sampler_view_reference(&idct_matrix, NULL);
+
+error_idct_matrix:
+   return false;
 }
 
 struct pipe_video_context *
@@ -690,38 +725,32 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
    ctx->buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
    ctx->buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
 
-   if (!init_idct(ctx, ctx->buffer_width, ctx->buffer_height)) {
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
-   }
+   if (!init_idct(ctx, ctx->buffer_width, ctx->buffer_height))
+      goto error_idct;
 
-   if (!vl_mpeg12_mc_renderer_init(&ctx->mc, ctx->pipe, ctx->buffer_width, ctx->buffer_height)) {
-      vl_idct_cleanup(&ctx->idct_y);
-      vl_idct_cleanup(&ctx->idct_c);
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
-   }
+   if (!vl_mpeg12_mc_renderer_init(&ctx->mc, ctx->pipe, ctx->buffer_width, ctx->buffer_height))
+      goto error_mc;
 
-   if (!vl_compositor_init(&ctx->compositor, ctx->pipe)) {
-      vl_idct_cleanup(&ctx->idct_y);
-      vl_idct_cleanup(&ctx->idct_c);
+   if (!vl_compositor_init(&ctx->compositor, ctx->pipe))
+      goto error_compositor;
+
+   if (!init_pipe_state(ctx))
+      goto error_pipe_state;
+
+   return &ctx->base;
+
+error_pipe_state:
+      vl_compositor_cleanup(&ctx->compositor);
+
+error_compositor:
       vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
-   }
 
-   if (!init_pipe_state(ctx)) {
+error_mc:
       vl_idct_cleanup(&ctx->idct_y);
       vl_idct_cleanup(&ctx->idct_c);
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
-      vl_compositor_cleanup(&ctx->compositor);
+
+error_idct:
       ctx->pipe->destroy(ctx->pipe);
       FREE(ctx);
       return NULL;
-   }
-
-   return &ctx->base;
 }
-- 
cgit v1.2.3


From 59774e5c7a2756c5c430fc74bc80ea75d54f594d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 2 Apr 2011 22:36:26 +0200
Subject: [g3dvl] remove unused color_swizzle from idct code

---
 src/gallium/auxiliary/vl/vl_idct.c           | 12 ++++++------
 src/gallium/auxiliary/vl/vl_idct.h           |  2 +-
 src/gallium/auxiliary/vl/vl_mpeg12_context.c |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index c73b4765212..6bebac1e88a 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -90,7 +90,7 @@ calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
 }
 
 static void *
-create_vert_shader(struct vl_idct *idct, bool matrix_stage, int color_swizzle)
+create_vert_shader(struct vl_idct *idct, bool matrix_stage)
 {
    struct ureg_program *shader;
    struct ureg_src vrect, vpos, vblock, eb;
@@ -358,9 +358,9 @@ create_transpose_frag_shader(struct vl_idct *idct)
 }
 
 static bool
-init_shaders(struct vl_idct *idct, int color_swizzle)
+init_shaders(struct vl_idct *idct)
 {
-   idct->matrix_vs = create_vert_shader(idct, true, color_swizzle);
+   idct->matrix_vs = create_vert_shader(idct, true);
    if (!idct->matrix_vs)
       goto error_matrix_vs;
 
@@ -368,7 +368,7 @@ init_shaders(struct vl_idct *idct, int color_swizzle)
    if (!idct->matrix_fs)
       goto error_matrix_fs;
 
-   idct->transpose_vs = create_vert_shader(idct, false, color_swizzle);
+   idct->transpose_vs = create_vert_shader(idct, false);
    if (!idct->transpose_vs)
       goto error_transpose_vs;
 
@@ -616,7 +616,7 @@ error_matrix:
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
                   unsigned blocks_x, unsigned blocks_y,
-                  int color_swizzle, struct pipe_sampler_view *matrix)
+                  struct pipe_sampler_view *matrix)
 {
    assert(idct && pipe && matrix);
 
@@ -627,7 +627,7 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
    idct->blocks_y = blocks_y;
    pipe_sampler_view_reference(&idct->matrix, matrix);
 
-   if(!init_shaders(idct, color_swizzle))
+   if(!init_shaders(idct))
       return false;
 
    if(!init_state(idct)) {
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index e0d441265ff..2d6778d7ec4 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -80,7 +80,7 @@ struct pipe_sampler_view *vl_idct_upload_matrix(struct pipe_context *pipe);
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
                   unsigned blocks_x, unsigned blocks_y,
-                  int color_swizzle, struct pipe_sampler_view *matrix);
+                  struct pipe_sampler_view *matrix);
 
 /* destroy an idct instance */
 void vl_idct_cleanup(struct vl_idct *idct);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index ac59793ea41..be598a8b55f 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -641,7 +641,7 @@ init_idct(struct vl_mpeg12_context *ctx, unsigned buffer_width, unsigned buffer_
       goto error_idct_matrix;
 
    if (!vl_idct_init(&ctx->idct_y, ctx->pipe, buffer_width, buffer_height,
-                     2, 2, TGSI_SWIZZLE_X, idct_matrix))
+                     2, 2, idct_matrix))
       goto error_idct_y;
 
    if (ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
@@ -662,7 +662,7 @@ init_idct(struct vl_mpeg12_context *ctx, unsigned buffer_width, unsigned buffer_
    }
 
    if(!vl_idct_init(&ctx->idct_c, ctx->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, TGSI_SWIZZLE_Y, idct_matrix))
+                    chroma_blocks_x, chroma_blocks_y, idct_matrix))
       goto error_idct_c;
 
    pipe_sampler_view_reference(&idct_matrix, NULL);
-- 
cgit v1.2.3


From 7dc87676f203f1df935a97348e1fb15274be4642 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 2 Apr 2011 23:16:21 +0200
Subject: vdpau: get state tracker to compile again

---
 src/gallium/state_trackers/vdpau/decode.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 048cfe819f0..b5e22883067 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -139,7 +139,8 @@ vlVdpCreateSurfaceTarget(vlVdpDecoder *vldecoder, vlVdpSurface *vlsurf)
 
    memset(&tmplt, 0, sizeof(struct pipe_resource));
    tmplt.target = PIPE_TEXTURE_2D;
-   tmplt.format = vctx->get_param(vctx,PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT);
+   // TODO
+   //tmplt.format = vctx->get_param(vctx,PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT);
    tmplt.last_level = 0;
    tmplt.width0 = vlsurf->width;
    tmplt.height0 = vlsurf->height;
@@ -218,9 +219,10 @@ vlVdpDecoderRenderMpeg2(vlVdpDecoder *vldecoder,
       goto skip_frame;
    }
 
-   vpipe->set_decode_target(vpipe,t_surf);
-   vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks,
-                             (struct pipe_macroblock *)pipe_macroblocks, NULL);
+   // TODO
+   //vpipe->set_decode_target(vpipe,t_surf);
+   //vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks,
+   //                          (struct pipe_macroblock *)pipe_macroblocks, NULL);
 
    skip_frame:
    return ret;
-- 
cgit v1.2.3


From d5b05a869a71147987a1c1d6c84f8df17148f031 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 3 Apr 2011 00:09:01 +0200
Subject: [g3dvl] start over with vdpau decoding

---
 src/gallium/state_trackers/vdpau/decode.c        | 20 +++++++++++++++++---
 src/gallium/state_trackers/vdpau/surface.c       | 18 +++++++++---------
 src/gallium/state_trackers/vdpau/vdpau_private.h |  9 +++++----
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index b5e22883067..7905227597a 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -39,6 +39,9 @@ vlVdpDecoderCreate(VdpDevice device,
                    uint32_t max_references,
                    VdpDecoder *decoder)
 {
+   return VDP_STATUS_NO_IMPLEMENTATION;
+
+#if 0
    enum pipe_video_profile p_profile = PIPE_VIDEO_PROFILE_UNKNOWN;
    VdpStatus ret = VDP_STATUS_OK;
    vlVdpDecoder *vldecoder = NULL;
@@ -93,11 +96,15 @@ no_screen:
 no_decoder:
 inv_device:
     return ret;
+#endif
 }
 
 VdpStatus
 vlVdpDecoderDestroy(VdpDecoder decoder)
 {
+   return VDP_STATUS_NO_IMPLEMENTATION;
+
+#if 0
    debug_printf("[VDPAU] Destroying decoder\n");
    vlVdpDecoder *vldecoder;
 
@@ -117,8 +124,10 @@ vlVdpDecoderDestroy(VdpDecoder decoder)
    FREE(vldecoder);
 
    return VDP_STATUS_OK;
+#endif
 }
 
+#if 0
 VdpStatus
 vlVdpCreateSurfaceTarget(vlVdpDecoder *vldecoder, vlVdpSurface *vlsurf)
 {
@@ -154,12 +163,12 @@ vlVdpCreateSurfaceTarget(vlVdpDecoder *vldecoder, vlVdpSurface *vlsurf)
    memset(&surf_template, 0, sizeof(surf_template));
    surf_template.format = surf_tex->format;
    surf_template.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-   vlsurf->psurface = vctx->create_surface(vctx->screen, surf_tex, &surf_template);
+   //vlsurf->psurface = vctx->create_surface(vctx->screen, surf_tex, &surf_template);
 
    pipe_resource_reference(&surf_tex, NULL);
 
-   if (!vlsurf->psurface)
-      return VDP_STATUS_RESOURCES;
+   //if (!vlsurf->psurface)
+   //   return VDP_STATUS_RESOURCES;
    debug_printf("[VDPAU] Done creating surface\n");
 
    return VDP_STATUS_OK;
@@ -227,6 +236,7 @@ vlVdpDecoderRenderMpeg2(vlVdpDecoder *vldecoder,
    skip_frame:
    return ret;
 }
+#endif
 
 VdpStatus
 vlVdpDecoderRender(VdpDecoder decoder,
@@ -235,6 +245,9 @@ vlVdpDecoderRender(VdpDecoder decoder,
                    uint32_t bitstream_buffer_count,
                    VdpBitstreamBuffer const *bitstream_buffers)
 {
+   return VDP_STATUS_NO_IMPLEMENTATION;
+
+#if 0
    vlVdpDecoder *vldecoder;
    vlVdpSurface *vlsurf;
    struct vl_screen *vscreen;
@@ -281,6 +294,7 @@ vlVdpDecoderRender(VdpDecoder decoder,
    assert(0);
 
    return ret;
+#endif
 }
 
 VdpStatus
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index a625ea401ba..e63e631fd83 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -77,7 +77,7 @@ vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
    return VDP_STATUS_OK;
 
 no_handle:
-   FREE(p_surf->psurface);
+   //FREE(p_surf->psurface);
 inv_device:
 no_surf:
    FREE(p_surf);
@@ -97,12 +97,12 @@ vlVdpVideoSurfaceDestroy(VdpVideoSurface surface)
    if (!p_surf)
       return VDP_STATUS_INVALID_HANDLE;
 
-   if (p_surf->psurface) {
-      if (p_surf->psurface->texture) {
-         if (p_surf->psurface->texture->screen)
-            p_surf->psurface->context->surface_destroy(p_surf->psurface->context, p_surf->psurface);
-      }
-   }
+   //if (p_surf->psurface) {
+   //   if (p_surf->psurface->texture) {
+   //      if (p_surf->psurface->texture->screen)
+   //         p_surf->psurface->context->surface_destroy(p_surf->psurface->context, p_surf->psurface);
+   //   }
+   //}
    FREE(p_surf);
    return VDP_STATUS_OK;
 }
@@ -142,8 +142,8 @@ vlVdpVideoSurfaceGetBitsYCbCr(VdpVideoSurface surface,
    if (!p_surf)
       return VDP_STATUS_INVALID_HANDLE;
 
-   if (!p_surf->psurface)
-      return VDP_STATUS_RESOURCES;
+   //if (!p_surf->psurface)
+   //   return VDP_STATUS_RESOURCES;
 
    return VDP_STATUS_OK;
 }
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index d04c517733d..707bb56b88b 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -187,12 +187,13 @@ typedef struct
 typedef struct
 {
    vlVdpDevice *device;
+   enum pipe_video_chroma_format chroma_format;
    uint32_t width;
    uint32_t height;
-   uint32_t pitch;
-   struct pipe_surface *psurface;
-   enum pipe_video_chroma_format chroma_format;
-   uint8_t *data;
+
+   //uint32_t pitch;
+   //struct pipe_surface *psurface;
+   //uint8_t *data;
 } vlVdpSurface;
 
 typedef struct
-- 
cgit v1.2.3


From e6d41e4d0326cac60b49e149d86d0f95c2518045 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 3 Apr 2011 00:14:38 +0200
Subject: [g3dvl] start to cleanup the mess and provide at least basic
 functionality

---
 src/gallium/state_trackers/vdpau/surface.c       |  4 ++--
 src/gallium/state_trackers/vdpau/vdpau_private.h | 18 ++++++++++++------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index e63e631fd83..aa8f90d576a 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -63,7 +63,7 @@ vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
       goto inv_device;
    }
 
-   p_surf->chroma_format = TypeToPipe(chroma_type);
+   p_surf->chroma_format = ChromaToPipe(chroma_type);
    p_surf->device = dev;
    p_surf->width = width;
    p_surf->height = height;
@@ -124,7 +124,7 @@ vlVdpVideoSurfaceGetParameters(VdpVideoSurface surface,
 
    *width = p_surf->width;
    *height = p_surf->height;
-   *chroma_type = PipeToType(p_surf->chroma_format);
+   *chroma_type = PipeToChroma(p_surf->chroma_format);
 
    return VDP_STATUS_OK;
 }
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 707bb56b88b..c5503ea554e 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -41,7 +41,8 @@
 #define INFORMATION_STRING TOSTRING(INFORMATION)
 #define VL_HANDLES
 
-static enum pipe_video_chroma_format TypeToPipe(VdpChromaType vdpau_type)
+static inline enum pipe_video_chroma_format
+ChromaToPipe(VdpChromaType vdpau_type)
 {
    switch (vdpau_type) {
       case VDP_CHROMA_TYPE_420:
@@ -57,7 +58,8 @@ static enum pipe_video_chroma_format TypeToPipe(VdpChromaType vdpau_type)
    return -1;
 }
 
-static VdpChromaType PipeToType(enum pipe_video_chroma_format pipe_type)
+static inline VdpChromaType
+PipeToChroma(enum pipe_video_chroma_format pipe_type)
 {
    switch (pipe_type) {
       case PIPE_VIDEO_CHROMA_FORMAT_420:
@@ -74,7 +76,8 @@ static VdpChromaType PipeToType(enum pipe_video_chroma_format pipe_type)
 }
 
 
-static enum pipe_format FormatToPipe(VdpYCbCrFormat vdpau_format)
+static inline enum pipe_format
+FormatToPipe(VdpYCbCrFormat vdpau_format)
 {
    switch (vdpau_format) {
       case VDP_YCBCR_FORMAT_NV12:
@@ -96,7 +99,8 @@ static enum pipe_format FormatToPipe(VdpYCbCrFormat vdpau_format)
    return -1;
 }
 
-static enum pipe_format FormatRGBAToPipe(VdpRGBAFormat vdpau_format)
+static inline enum pipe_format
+FormatRGBAToPipe(VdpRGBAFormat vdpau_format)
 {
    switch (vdpau_format) {
       case VDP_RGBA_FORMAT_A8:
@@ -116,7 +120,8 @@ static enum pipe_format FormatRGBAToPipe(VdpRGBAFormat vdpau_format)
    return -1;
 }
 
-static VdpYCbCrFormat PipeToFormat(enum pipe_format p_format)
+static inline VdpYCbCrFormat
+PipeToFormat(enum pipe_format p_format)
 {
    switch (p_format) {
       case PIPE_FORMAT_NV12:
@@ -138,7 +143,8 @@ static VdpYCbCrFormat PipeToFormat(enum pipe_format p_format)
    return -1;
 }
 
-static enum pipe_video_profile ProfileToPipe(VdpDecoderProfile vdpau_profile)
+static inline enum pipe_video_profile
+ProfileToPipe(VdpDecoderProfile vdpau_profile)
 {
    switch (vdpau_profile) {
       case VDP_DECODER_PROFILE_MPEG1:
-- 
cgit v1.2.3


From e5f78a74f8294ee02015552db664dae1e7da9f47 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 3 Apr 2011 16:09:23 +0200
Subject: [g3dvl] split compositor out of video context

Also redesign the compositor a bit and make the result a public available interface
---
 src/gallium/auxiliary/vl/vl_compositor.c           | 556 ++++++++++-----------
 src/gallium/auxiliary/vl/vl_compositor.h           |  63 +--
 src/gallium/auxiliary/vl/vl_mpeg12_context.c       |  90 ++--
 src/gallium/auxiliary/vl/vl_mpeg12_context.h       |   1 -
 src/gallium/include/pipe/p_video_context.h         | 106 ++--
 src/gallium/state_trackers/xorg/xvmc/context.c     |  12 +-
 src/gallium/state_trackers/xorg/xvmc/surface.c     |  21 +-
 .../state_trackers/xorg/xvmc/xvmc_private.h        |   2 +
 8 files changed, 423 insertions(+), 428 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index f62706e5066..45e9cea9f66 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -36,38 +36,11 @@
 #include <util/u_sampler.h>
 #include <tgsi/tgsi_ureg.h>
 #include "vl_csc.h"
+#include "vl_types.h"
 
-struct vertex_shader_consts
-{
-   struct vertex4f dst_scale;
-   struct vertex4f dst_trans;
-   struct vertex4f src_scale;
-   struct vertex4f src_trans;
-};
-
-struct fragment_shader_consts
-{
-   float matrix[16];
-};
-
-static bool
-u_video_rects_equal(struct pipe_video_rect *a, struct pipe_video_rect *b)
-{
-   assert(a && b);
-
-   if (a->x != b->x)
-      return false;
-   if (a->y != b->y)
-      return false;
-   if (a->w != b->w)
-      return false;
-   if (a->h != b->h)
-      return false;
-
-   return true;
-}
+typedef float csc_matrix[16];
 
-static bool
+static void *
 create_vert_shader(struct vl_compositor *c)
 {
    struct ureg_program *shader;
@@ -92,15 +65,11 @@ create_vert_shader(struct vl_compositor *c)
 
    ureg_END(shader);
 
-   c->vertex_shader = ureg_create_shader_and_destroy(shader, c->pipe);
-   if (!c->vertex_shader)
-      return false;
-
-   return true;
+   return ureg_create_shader_and_destroy(shader, c->pipe);
 }
 
-static bool
-create_frag_shader_ycbcr_2_rgb(struct vl_compositor *c)
+static void *
+create_frag_shader_video_buffer(struct vl_compositor *c)
 {
    struct ureg_program *shader;
    struct ureg_src tc;
@@ -139,15 +108,11 @@ create_frag_shader_ycbcr_2_rgb(struct vl_compositor *c)
    ureg_release_temporary(shader, texel);
    ureg_END(shader);
 
-   c->fragment_shader.ycbcr_2_rgb = ureg_create_shader_and_destroy(shader, c->pipe);
-   if (!c->fragment_shader.ycbcr_2_rgb)
-      return false;
-
-   return true;
+   return ureg_create_shader_and_destroy(shader, c->pipe);
 }
 
-static bool
-create_frag_shader_palette_2_rgb(struct vl_compositor *c)
+static void *
+create_frag_shader_palette(struct vl_compositor *c)
 {
    struct ureg_program *shader;
    struct ureg_src tc;
@@ -167,7 +132,9 @@ create_frag_shader_palette_2_rgb(struct vl_compositor *c)
    texel = ureg_DECL_temporary(shader);
 
    /*
-    * fragment = tex(tc, sampler)
+    * texel = tex(tc, sampler)
+    * fragment.xyz = tex(texel, palette)
+    * fragment.a = texel.a
     */
    ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
    ureg_TEX(shader, fragment, TGSI_TEXTURE_1D, ureg_src(texel), palette);
@@ -176,15 +143,11 @@ create_frag_shader_palette_2_rgb(struct vl_compositor *c)
    ureg_release_temporary(shader, texel);
    ureg_END(shader);
 
-   c->fragment_shader.palette_2_rgb = ureg_create_shader_and_destroy(shader, c->pipe);
-   if (!c->fragment_shader.palette_2_rgb)
-      return false;
-
-   return true;
+   return ureg_create_shader_and_destroy(shader, c->pipe);
 }
 
-static bool
-create_frag_shader_rgb_2_rgb(struct vl_compositor *c)
+static void *
+create_frag_shader_rgba(struct vl_compositor *c)
 {
    struct ureg_program *shader;
    struct ureg_src tc;
@@ -205,13 +168,51 @@ create_frag_shader_rgb_2_rgb(struct vl_compositor *c)
    ureg_TEX(shader, fragment, TGSI_TEXTURE_2D, tc, sampler);
    ureg_END(shader);
 
-   c->fragment_shader.rgb_2_rgb = ureg_create_shader_and_destroy(shader, c->pipe);
-   if (!c->fragment_shader.rgb_2_rgb)
+   return ureg_create_shader_and_destroy(shader, c->pipe);
+}
+
+static bool
+init_shaders(struct vl_compositor *c)
+{
+   assert(c);
+
+   c->vs = create_vert_shader(c);
+   if (!c->vs) {
+      debug_printf("Unable to create vertex shader.\n");
       return false;
+   }
+
+   c->fs_video_buffer = create_frag_shader_video_buffer(c);
+   if (!c->fs_video_buffer) {
+      debug_printf("Unable to create YCbCr-to-RGB fragment shader.\n");
+      return false;
+   }
+
+   c->fs_palette = create_frag_shader_palette(c);
+   if (!c->fs_palette) {
+      debug_printf("Unable to create Palette-to-RGB fragment shader.\n");
+      return false;
+   }
+
+   c->fs_rgba = create_frag_shader_rgba(c);
+   if (!c->fs_rgba) {
+      debug_printf("Unable to create RGB-to-RGB fragment shader.\n");
+      return false;
+   }
 
    return true;
 }
 
+static void cleanup_shaders(struct vl_compositor *c)
+{
+   assert(c);
+
+   c->pipe->delete_vs_state(c->pipe, c->vs);
+   c->pipe->delete_fs_state(c->pipe, c->fs_video_buffer);
+   c->pipe->delete_fs_state(c->pipe, c->fs_palette);
+   c->pipe->delete_fs_state(c->pipe, c->fs_rgba);
+}
+
 static bool
 init_pipe_state(struct vl_compositor *c)
 {
@@ -223,6 +224,13 @@ init_pipe_state(struct vl_compositor *c)
    c->fb_state.nr_cbufs = 1;
    c->fb_state.zsbuf = NULL;
 
+   c->viewport.scale[2] = 1;
+   c->viewport.scale[3] = 1;
+   c->viewport.translate[0] = 0;
+   c->viewport.translate[1] = 0;
+   c->viewport.translate[2] = 0;
+   c->viewport.translate[3] = 0;
+
    memset(&sampler, 0, sizeof(sampler));
    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
@@ -266,45 +274,9 @@ static void cleanup_pipe_state(struct vl_compositor *c)
    c->pipe->delete_blend_state(c->pipe, c->blend);
 }
 
-static bool
-init_shaders(struct vl_compositor *c)
-{
-   assert(c);
-
-   if (!create_vert_shader(c)) {
-      debug_printf("Unable to create vertex shader.\n");
-      return false;
-   }
-   if (!create_frag_shader_ycbcr_2_rgb(c)) {
-      debug_printf("Unable to create YCbCr-to-RGB fragment shader.\n");
-      return false;
-   }
-   if (!create_frag_shader_palette_2_rgb(c)) {
-      debug_printf("Unable to create Palette-to-RGB fragment shader.\n");
-      return false;
-   }
-   if (!create_frag_shader_rgb_2_rgb(c)) {
-      debug_printf("Unable to create RGB-to-RGB fragment shader.\n");
-      return false;
-   }
-
-   return true;
-}
-
-static void cleanup_shaders(struct vl_compositor *c)
-{
-   assert(c);
-
-   c->pipe->delete_vs_state(c->pipe, c->vertex_shader);
-   c->pipe->delete_fs_state(c->pipe, c->fragment_shader.ycbcr_2_rgb);
-   c->pipe->delete_fs_state(c->pipe, c->fragment_shader.palette_2_rgb);
-   c->pipe->delete_fs_state(c->pipe, c->fragment_shader.rgb_2_rgb);
-}
-
 static bool
 init_buffers(struct vl_compositor *c)
 {
-   struct fragment_shader_consts fsc;
    struct pipe_vertex_element vertex_elems[2];
 
    assert(c);
@@ -338,18 +310,14 @@ init_buffers(struct vl_compositor *c)
     * Const buffer contains the color conversion matrix and bias vectors
     */
    /* XXX: Create with IMMUTABLE/STATIC... although it does change every once in a long while... */
-   c->fs_const_buf = pipe_buffer_create
+   c->csc_matrix = pipe_buffer_create
    (
       c->pipe->screen,
       PIPE_BIND_CONSTANT_BUFFER,
       PIPE_USAGE_STATIC,
-      sizeof(struct fragment_shader_consts)
+      sizeof(csc_matrix)
    );
 
-   vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, fsc.matrix);
-
-   vl_compositor_set_csc_matrix(c, fsc.matrix);
-
    return true;
 }
 
@@ -360,99 +328,27 @@ cleanup_buffers(struct vl_compositor *c)
 
    c->pipe->delete_vertex_elements_state(c->pipe, c->vertex_elems_state);
    pipe_resource_reference(&c->vertex_buf.buffer, NULL);
-   pipe_resource_reference(&c->fs_const_buf, NULL);
+   pipe_resource_reference(&c->csc_matrix, NULL);
 }
 
-bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe)
+static inline struct pipe_video_rect
+default_rect(struct vl_compositor_layer *layer)
 {
-   unsigned i;
-
-   assert(compositor);
-
-   memset(compositor, 0, sizeof(struct vl_compositor));
-
-   compositor->pipe = pipe;
-
-   if (!init_pipe_state(compositor))
-      return false;
-
-   if (!init_shaders(compositor)) {
-      cleanup_pipe_state(compositor);
-      return false;
-   }
-   if (!init_buffers(compositor)) {
-      cleanup_shaders(compositor);
-      cleanup_pipe_state(compositor);
-      return false;
-   }
-
-   compositor->fb_state.width = 0;
-   compositor->fb_state.height = 0;
-   for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i)
-      compositor->layers[i] = NULL;
-   compositor->dirty_layers = 0;
-
-   return true;
+   struct pipe_resource *res = layer->sampler_views[0]->texture;
+   struct pipe_video_rect rect = { 0, 0, res->width0, res->height0 };
+   return rect;
 }
 
-void vl_compositor_cleanup(struct vl_compositor *compositor)
-{
-   assert(compositor);
-
-   cleanup_buffers(compositor);
-   cleanup_shaders(compositor);
-   cleanup_pipe_state(compositor);
-}
-
-void vl_compositor_set_layers(struct vl_compositor *compositor,
-                              struct pipe_sampler_view *layers[],
-                              struct pipe_sampler_view *palettes[],
-                              struct pipe_video_rect *src_rects[],
-                              struct pipe_video_rect *dst_rects[],
-                              unsigned num_layers)
-{
-   unsigned i;
-
-   assert(compositor);
-   assert(num_layers <= VL_COMPOSITOR_MAX_LAYERS);
-
-   for (i = 0; i < num_layers; ++i)
-   {
-      assert((layers[i] && src_rects[i] && dst_rects[i]) ||
-             (!layers[i] && !src_rects[i] && !dst_rects[i]));
-
-      if (compositor->layers[i] != layers[i] ||
-          compositor->palettes[i] != palettes[i] ||
-          !u_video_rects_equal(&compositor->layer_src_rects[i], src_rects[i]) ||
-          !u_video_rects_equal(&compositor->layer_dst_rects[i], dst_rects[i]))
-      {
-         pipe_sampler_view_reference(&compositor->layers[i], layers[i]);
-         pipe_sampler_view_reference(&compositor->palettes[i], palettes[i]);
-         compositor->layer_src_rects[i] = *src_rects[i];
-         compositor->layer_dst_rects[i] = *dst_rects[i];
-         compositor->dirty_layers |= 1 << i;
-      }
-
-      if (layers[i])
-         compositor->dirty_layers |= 1 << i;
-   }
-
-   for (; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
-      pipe_sampler_view_reference(&compositor->layers[i], NULL);
-      pipe_sampler_view_reference(&compositor->palettes[i], NULL);
-   }
-}
-
-static void gen_rect_verts(struct pipe_video_rect *src_rect,
-                           struct vertex2f *src_inv_size,
-                           struct pipe_video_rect *dst_rect,
-                           struct vertex2f *dst_inv_size,
-                           struct vertex4f *vb)
+static void
+gen_rect_verts(struct vertex4f *vb,
+               struct pipe_video_rect *src_rect,
+               struct vertex2f *src_inv_size,
+               struct pipe_video_rect *dst_rect,
+               struct vertex2f *dst_inv_size)
 {
-   assert(src_rect);
-   assert(src_inv_size);
-   assert((dst_rect && dst_inv_size) /*|| (!dst_rect && !dst_inv_size)*/);
    assert(vb);
+   assert(src_rect && src_inv_size);
+   assert(dst_rect && dst_inv_size);
 
    vb[0].x = dst_rect->x * dst_inv_size->x;
    vb[0].y = dst_rect->y * dst_inv_size->y;
@@ -475,162 +371,252 @@ static void gen_rect_verts(struct pipe_video_rect *src_rect,
    vb[3].w = (src_rect->y + src_rect->h) * src_inv_size->y;
 }
 
-static unsigned gen_data(struct vl_compositor *c,
-                         struct pipe_sampler_view *src_surface,
-                         struct pipe_video_rect *src_rect,
-                         struct pipe_video_rect *dst_rect,
-                         struct pipe_sampler_view *textures[VL_COMPOSITOR_MAX_LAYERS + 1][2],
-                         void **frag_shaders)
+static void
+gen_vertex_data(struct vl_compositor *c, struct pipe_video_rect *dst_rect, struct vertex2f *dst_inv_size)
 {
    struct vertex4f *vb;
    struct pipe_transfer *buf_transfer;
-   unsigned num_rects = 0;
    unsigned i;
 
    assert(c);
-   assert(src_surface);
-   assert(src_rect);
    assert(dst_rect);
-   assert(textures);
 
    vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
                         PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
                         &buf_transfer);
 
    if (!vb)
-      return 0;
-
-   {
-      struct vertex2f src_inv_size = { 1.0f / src_surface->texture->width0, 1.0f / src_surface->texture->height0};
-      gen_rect_verts(src_rect, &src_inv_size, dst_rect, &c->fb_inv_size, vb);
-      textures[num_rects][0] = src_surface;
-      textures[num_rects][1] = NULL;
-      /* XXX: Hack, sort of */
-      frag_shaders[num_rects] = c->fragment_shader.ycbcr_2_rgb;
-      ++num_rects;
-      vb += 4;
-   }
+      return;
 
-   for (i = 0; c->dirty_layers > 0; i++) {
-      assert(i < VL_COMPOSITOR_MAX_LAYERS);
+   for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; i++) {
+      if (c->used_layers & (1 << i)) {
+         struct pipe_sampler_view *sv = c->layers[i].sampler_views[0];
+         struct vertex2f src_inv_size = {1.0f / sv->texture->width0, 1.0f / sv->texture->height0};
 
-      if (c->dirty_layers & (1 << i)) {
-         struct vertex2f layer_inv_size = {1.0f / c->layers[i]->texture->width0, 1.0f / c->layers[i]->texture->height0};
-         gen_rect_verts(&c->layer_src_rects[i], &layer_inv_size, &c->layer_dst_rects[i], &layer_inv_size, vb);
-         textures[num_rects][0] = c->layers[i];
-         textures[num_rects][1] = c->palettes[i];
-
-         if (c->palettes[i])
-            frag_shaders[num_rects] = c->fragment_shader.palette_2_rgb;
+         if (&c->layers[i].fs == c->fs_video_buffer)
+            gen_rect_verts(vb, &c->layers[i].src_rect, &src_inv_size, dst_rect, dst_inv_size);
          else
-            frag_shaders[num_rects] = c->fragment_shader.rgb_2_rgb;
+            gen_rect_verts(vb, &c->layers[i].src_rect, &src_inv_size, &c->layers[i].dst_rect, &src_inv_size);
 
-         ++num_rects;
          vb += 4;
-         c->dirty_layers &= ~(1 << i);
       }
    }
 
    pipe_buffer_unmap(c->pipe, buf_transfer);
-
-   return num_rects;
 }
 
-static void draw_layers(struct vl_compositor *c,
-                        struct vl_ycbcr_sampler_views *src_sampler,
-                        struct pipe_video_rect *src_rect,
-                        struct pipe_video_rect *dst_rect)
+static void
+draw_layers(struct vl_compositor *c)
 {
-   unsigned num_rects;
-   struct pipe_sampler_view *surfaces[VL_COMPOSITOR_MAX_LAYERS + 1][2];
-   void *frag_shaders[VL_COMPOSITOR_MAX_LAYERS + 1];
-   unsigned i;
+   unsigned vb_index, i;
 
    assert(c);
-   assert(src_sampler);
-   assert(src_rect);
-   assert(dst_rect);
 
-   num_rects = gen_data(c, src_sampler->y, src_rect, dst_rect, surfaces, frag_shaders);
+   for (i = 0, vb_index = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
+      if (c->used_layers & (1 << i)) {
+         struct pipe_sampler_view **samplers = &c->layers[i].sampler_views[0];
+         unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
 
-   c->pipe->bind_blend_state(c->pipe, c->blend);
-   for (i = 0; i < num_rects; ++i) {
-      c->pipe->bind_fs_state(c->pipe, frag_shaders[i]);
-      if (i == 0) {
-         c->pipe->set_fragment_sampler_views(c->pipe, 3, &src_sampler->y);
-      } else {
-         c->pipe->set_fragment_sampler_views(c->pipe, surfaces[i][1] ? 2 : 1, &surfaces[i][0]);
+         c->pipe->bind_fs_state(c->pipe, c->layers[i].fs);
+         c->pipe->set_fragment_sampler_views(c->pipe, num_sampler_views, samplers);
+         util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, vb_index * 4, 4);
+         vb_index++;
       }
-
-      util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, i * 4, 4);
    }
 }
 
-void vl_compositor_render(struct vl_compositor          *compositor,
-                          struct vl_ycbcr_sampler_views *src_sampler,
-                          struct pipe_video_rect        *src_area,
-                          struct pipe_surface           *dst_surface,
-                          struct pipe_video_rect        *dst_area,
-                          struct pipe_fence_handle      **fence)
+static void
+vl_compositor_clear_layers(struct pipe_video_compositor *compositor)
 {
-   void *samplers[3];
+   struct vl_compositor *c = (struct vl_compositor *)compositor;
+   unsigned i, j;
 
    assert(compositor);
-   assert(src_sampler);
-   assert(src_area);
-   assert(dst_surface);
-   assert(dst_area);
 
-   if (compositor->fb_state.width != dst_surface->width) {
-      compositor->fb_inv_size.x = 1.0f / dst_surface->width;
-      compositor->fb_state.width = dst_surface->width;
-   }
-   if (compositor->fb_state.height != dst_surface->height) {
-      compositor->fb_inv_size.y = 1.0f / dst_surface->height;
-      compositor->fb_state.height = dst_surface->height;
+   c->used_layers = 0;
+   for ( i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
+      c->layers[i].fs = NULL;
+      for ( j = 0; j < 3; j++)
+         pipe_sampler_view_reference(&c->layers[i].sampler_views[j], NULL);
    }
+}
 
-   compositor->fb_state.cbufs[0] = dst_surface;
-
-   compositor->viewport.scale[0] = compositor->fb_state.width;
-   compositor->viewport.scale[1] = compositor->fb_state.height;
-   compositor->viewport.scale[2] = 1;
-   compositor->viewport.scale[3] = 1;
-   compositor->viewport.translate[0] = 0;
-   compositor->viewport.translate[1] = 0;
-   compositor->viewport.translate[2] = 0;
-   compositor->viewport.translate[3] = 0;
-
-   samplers[0] = samplers[1] = samplers[2] = compositor->sampler;
+static void
+vl_compositor_destroy(struct pipe_video_compositor *compositor)
+{
+   struct vl_compositor *c = (struct vl_compositor *)compositor;
+   assert(compositor);
 
-   compositor->pipe->set_framebuffer_state(compositor->pipe, &compositor->fb_state);
-   compositor->pipe->set_viewport_state(compositor->pipe, &compositor->viewport);
-   compositor->pipe->bind_fragment_sampler_states(compositor->pipe, 3, &samplers[0]);
-   compositor->pipe->bind_vs_state(compositor->pipe, compositor->vertex_shader);
-   compositor->pipe->set_vertex_buffers(compositor->pipe, 1, &compositor->vertex_buf);
-   compositor->pipe->bind_vertex_elements_state(compositor->pipe, compositor->vertex_elems_state);
-   compositor->pipe->set_constant_buffer(compositor->pipe, PIPE_SHADER_FRAGMENT, 0, compositor->fs_const_buf);
+   vl_compositor_clear_layers(compositor);
 
-   draw_layers(compositor, src_sampler, src_area, dst_area);
+   cleanup_buffers(c);
+   cleanup_shaders(c);
+   cleanup_pipe_state(c);
 
-   assert(!compositor->dirty_layers);
-   compositor->pipe->flush(compositor->pipe, fence);
+   FREE(compositor);
 }
 
-void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float *mat)
+static void
+vl_compositor_set_csc_matrix(struct pipe_video_compositor *compositor, const float matrix[16])
 {
+   struct vl_compositor *c = (struct vl_compositor *)compositor;
    struct pipe_transfer *buf_transfer;
 
    assert(compositor);
 
    memcpy
    (
-      pipe_buffer_map(compositor->pipe, compositor->fs_const_buf,
+      pipe_buffer_map(c->pipe, c->csc_matrix,
                       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
                       &buf_transfer),
-		mat,
-		sizeof(struct fragment_shader_consts)
+		matrix,
+		sizeof(csc_matrix)
    );
 
-   pipe_buffer_unmap(compositor->pipe, buf_transfer);
+   pipe_buffer_unmap(c->pipe, buf_transfer);
+}
+
+static void
+vl_compositor_set_buffer_layer(struct pipe_video_compositor *compositor,
+                               unsigned layer,
+                               struct pipe_video_buffer *buffer,
+                               struct pipe_video_rect *src_rect,
+                               struct pipe_video_rect *dst_rect)
+{
+   struct vl_compositor *c = (struct vl_compositor *)compositor;
+   assert(compositor && buffer);
+
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+   c->used_layers |= 1 << layer;
+   c->layers[layer].fs = c->fs_video_buffer;
+   buffer->get_sampler_views(buffer, c->layers[layer].sampler_views);
+   c->layers[layer].src_rect = src_rect ? *src_rect : default_rect(&c->layers[layer]);
+   c->layers[layer].dst_rect = dst_rect ? *dst_rect : default_rect(&c->layers[layer]);
+}
+
+static void
+vl_compositor_set_palette_layer(struct pipe_video_compositor *compositor,
+                                unsigned layer,
+                                struct pipe_sampler_view *indexes,
+                                struct pipe_sampler_view *palette,
+                                struct pipe_video_rect *src_rect,
+                                struct pipe_video_rect *dst_rect)
+{
+   struct vl_compositor *c = (struct vl_compositor *)compositor;
+   assert(compositor && indexes && palette);
+
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+   c->used_layers |= 1 << layer;
+   c->layers[layer].fs = c->fs_palette;
+   pipe_sampler_view_reference(&c->layers[layer].sampler_views[0], indexes);
+   pipe_sampler_view_reference(&c->layers[layer].sampler_views[1], palette);
+   pipe_sampler_view_reference(&c->layers[layer].sampler_views[2], NULL);
+   c->layers[layer].src_rect = src_rect ? *src_rect : default_rect(&c->layers[layer]);
+   c->layers[layer].dst_rect = dst_rect ? *dst_rect : default_rect(&c->layers[layer]);
+}
+
+static void
+vl_compositor_set_rgba_layer(struct pipe_video_compositor *compositor,
+                             unsigned layer,
+                             struct pipe_sampler_view *rgba,
+                             struct pipe_video_rect *src_rect,
+                             struct pipe_video_rect *dst_rect)
+{
+   struct vl_compositor *c = (struct vl_compositor *)compositor;
+   assert(compositor && rgba);
+
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+   c->used_layers |= 1 << layer;
+   c->layers[layer].fs = c->fs_rgba;
+   pipe_sampler_view_reference(&c->layers[layer].sampler_views[0], rgba);
+   pipe_sampler_view_reference(&c->layers[layer].sampler_views[1], NULL);
+   pipe_sampler_view_reference(&c->layers[layer].sampler_views[2], NULL);
+   c->layers[layer].src_rect = src_rect ? *src_rect : default_rect(&c->layers[layer]);
+   c->layers[layer].dst_rect = dst_rect ? *dst_rect : default_rect(&c->layers[layer]);
+}
+
+static void
+vl_compositor_render(struct pipe_video_compositor *compositor,
+                     enum pipe_mpeg12_picture_type picture_type,
+                     struct pipe_surface           *dst_surface,
+                     struct pipe_video_rect        *dst_area,
+                     struct pipe_fence_handle      **fence)
+{
+   struct vl_compositor *c = (struct vl_compositor *)compositor;
+   struct vertex2f dst_inv_size;
+   void *samplers[3];
+
+   assert(compositor);
+   assert(dst_surface);
+   assert(dst_area);
+
+   c->fb_state.width = dst_surface->width;
+   c->fb_state.height = dst_surface->height;
+   c->fb_state.cbufs[0] = dst_surface;
+
+   c->viewport.scale[0] = dst_surface->width;
+   c->viewport.scale[1] = dst_surface->height;
+
+   dst_inv_size.x = 1.0f / dst_surface->width;
+   dst_inv_size.y = 1.0f / dst_surface->height;
+
+   samplers[0] = samplers[1] = samplers[2] = c->sampler;
+
+   gen_vertex_data(c, dst_area, &dst_inv_size);
+
+   c->pipe->set_framebuffer_state(c->pipe, &c->fb_state);
+   c->pipe->set_viewport_state(c->pipe, &c->viewport);
+   c->pipe->bind_fragment_sampler_states(c->pipe, 3, &samplers[0]);
+   c->pipe->bind_vs_state(c->pipe, c->vs);
+   c->pipe->set_vertex_buffers(c->pipe, 1, &c->vertex_buf);
+   c->pipe->bind_vertex_elements_state(c->pipe, c->vertex_elems_state);
+   c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_FRAGMENT, 0, c->csc_matrix);
+   c->pipe->bind_blend_state(c->pipe, c->blend);
+
+   draw_layers(c);
+
+   c->pipe->flush(c->pipe, fence);
+}
+
+struct pipe_video_compositor *
+vl_compositor_init(struct pipe_video_context *vpipe, struct pipe_context *pipe)
+{
+   csc_matrix csc_matrix;
+   struct vl_compositor *compositor;
+
+   compositor = CALLOC_STRUCT(vl_compositor);
+
+   compositor->base.context = vpipe;
+   compositor->base.destroy = vl_compositor_destroy;
+   compositor->base.set_csc_matrix = vl_compositor_set_csc_matrix;
+   compositor->base.clear_layers = vl_compositor_clear_layers;
+   compositor->base.set_buffer_layer = vl_compositor_set_buffer_layer;
+   compositor->base.set_palette_layer = vl_compositor_set_palette_layer;
+   compositor->base.set_rgba_layer = vl_compositor_set_rgba_layer;
+   compositor->base.render_picture = vl_compositor_render;
+
+   compositor->pipe = pipe;
+
+   if (!init_pipe_state(compositor))
+      return false;
+
+   if (!init_shaders(compositor)) {
+      cleanup_pipe_state(compositor);
+      return false;
+   }
+   if (!init_buffers(compositor)) {
+      cleanup_shaders(compositor);
+      cleanup_pipe_state(compositor);
+      return false;
+   }
+
+   vl_compositor_clear_layers(&compositor->base);
+
+   vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, csc_matrix);
+   vl_compositor_set_csc_matrix(&compositor->base, csc_matrix);
+
+   return &compositor->base;
 }
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index e10a663f860..0e4badb65d2 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -28,63 +28,46 @@
 #ifndef vl_compositor_h
 #define vl_compositor_h
 
-#include <pipe/p_compiler.h>
 #include <pipe/p_state.h>
+#include <pipe/p_video_context.h>
 #include <pipe/p_video_state.h>
-#include "vl_types.h"
-#include "vl_ycbcr_buffer.h"
 
 struct pipe_context;
-struct keymap;
 
 #define VL_COMPOSITOR_MAX_LAYERS 16
 
+struct vl_compositor_layer
+{
+   void *fs;
+   struct pipe_sampler_view *sampler_views[3];
+   struct pipe_video_rect src_rect;
+   struct pipe_video_rect dst_rect;
+};
+
 struct vl_compositor
 {
+   struct pipe_video_compositor base;
    struct pipe_context *pipe;
 
    struct pipe_framebuffer_state fb_state;
-   struct vertex2f fb_inv_size;
-   void *sampler;
-   void *blend;
-   struct pipe_sampler_view *sampler_view;
-   void *vertex_shader;
-   struct
-   {
-      void *ycbcr_2_rgb;
-      void *rgb_2_rgb;
-      void *palette_2_rgb;
-   } fragment_shader;
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buf;
-   void *vertex_elems_state;
-   struct pipe_resource *fs_const_buf;
+   struct pipe_resource *csc_matrix;
 
-   struct pipe_sampler_view *layers[VL_COMPOSITOR_MAX_LAYERS];
-   struct pipe_sampler_view *palettes[VL_COMPOSITOR_MAX_LAYERS];
-   struct pipe_video_rect layer_src_rects[VL_COMPOSITOR_MAX_LAYERS];
-   struct pipe_video_rect layer_dst_rects[VL_COMPOSITOR_MAX_LAYERS];
-   unsigned dirty_layers;
-};
-
-bool vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe);
-
-void vl_compositor_cleanup(struct vl_compositor *compositor);
+   void *sampler;
+   void *blend;
+   void *vertex_elems_state;
 
-void vl_compositor_set_layers(struct vl_compositor *compositor,
-                              struct pipe_sampler_view *layers[],
-                              struct pipe_sampler_view *palettes[],
-                              struct pipe_video_rect *src_rects[],
-                              struct pipe_video_rect *dst_rects[],
-                              unsigned num_layers);
+   void *vs;
+   void *fs_video_buffer;
+   void *fs_palette;
+   void *fs_rgba;
 
-void vl_compositor_render(struct vl_compositor          *compositor,
-                          struct vl_ycbcr_sampler_views *src_sampler,
-                          struct pipe_video_rect        *src_area,
-                          struct pipe_surface           *dst_surface,
-                          struct pipe_video_rect        *dst_area,
-                          struct pipe_fence_handle      **fence);
+   unsigned used_layers:VL_COMPOSITOR_MAX_LAYERS;
+   struct vl_compositor_layer layers[VL_COMPOSITOR_MAX_LAYERS];
+};
 
-void vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float *mat);
+struct pipe_video_compositor *vl_compositor_init(struct pipe_video_context *vpipe,
+                                                 struct pipe_context *pipe);
 
 #endif /* vl_compositor_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index be598a8b55f..d8605398d49 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -221,6 +221,24 @@ vl_mpeg12_buffer_flush(struct pipe_video_buffer *buffer,
                                sv_refs, ne_start, ne_num, e_start, e_num, fence);
 }
 
+static void
+vl_mpeg12_buffer_get_sampler_views(struct pipe_video_buffer *buffer,
+                                   struct pipe_sampler_view *sampler_views[3])
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_ycbcr_sampler_views *samplers;
+
+   assert(buf);
+
+   samplers = vl_ycbcr_get_sampler_views(&buf->render_result);
+
+   assert(samplers);
+
+   pipe_sampler_view_reference(&sampler_views[0], samplers->y);
+   pipe_sampler_view_reference(&sampler_views[1], samplers->cb);
+   pipe_sampler_view_reference(&sampler_views[2], samplers->cr);
+}
+
 static void
 vl_mpeg12_destroy(struct pipe_video_context *vpipe)
 {
@@ -236,7 +254,6 @@ vl_mpeg12_destroy(struct pipe_video_context *vpipe)
    ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast);
    ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
 
-   vl_compositor_cleanup(&ctx->compositor);
    vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
    vl_idct_cleanup(&ctx->idct_y);
    vl_idct_cleanup(&ctx->idct_c);
@@ -308,6 +325,7 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
    buffer->base.add_macroblocks = vl_mpeg12_buffer_add_macroblocks;
    buffer->base.unmap = vl_mpeg12_buffer_unmap;
    buffer->base.flush = vl_mpeg12_buffer_flush;
+   buffer->base.get_sampler_views = vl_mpeg12_buffer_get_sampler_views;
 
    buffer->vertex_bufs.individual.quad.stride = ctx->quads.stride;
    buffer->vertex_bufs.individual.quad.buffer_offset = ctx->quads.buffer_offset;
@@ -494,56 +512,14 @@ error_map:
    ctx->pipe->transfer_destroy(ctx->pipe, transfer);
 }
 
-static void
-vl_mpeg12_render_picture(struct pipe_video_context     *vpipe,
-                         struct pipe_video_buffer      *src_surface,
-                         struct pipe_video_rect        *src_area,
-                         enum pipe_mpeg12_picture_type picture_type,
-                         struct pipe_surface           *dst_surface,
-                         struct pipe_video_rect        *dst_area,
-                         struct pipe_fence_handle      **fence)
+static struct pipe_video_compositor *
+vl_mpeg12_create_compositor(struct pipe_video_context *vpipe)
 {
    struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)src_surface;
-   struct vl_ycbcr_sampler_views *sampler_views;
 
    assert(vpipe);
-   assert(src_surface);
-   assert(src_area);
-   assert(dst_surface);
-   assert(dst_area);
-
-   sampler_views = vl_ycbcr_get_sampler_views(&buf->render_result);
 
-   vl_compositor_render(&ctx->compositor, sampler_views, src_area,
-                        dst_surface, dst_area, fence);
-}
-
-static void
-vl_mpeg12_set_picture_layers(struct pipe_video_context *vpipe,
-                             struct pipe_sampler_view *layers[],
-                             struct pipe_sampler_view *palettes[],
-                             struct pipe_video_rect *src_rects[],
-                             struct pipe_video_rect *dst_rects[],
-                             unsigned num_layers)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-   assert((layers && src_rects && dst_rects) ||
-          (!layers && !src_rects && !dst_rects));
-
-   vl_compositor_set_layers(&ctx->compositor, layers, palettes, src_rects, dst_rects, num_layers);
-}
-
-static void
-vl_mpeg12_set_csc_matrix(struct pipe_video_context *vpipe, const float *mat)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-
-   vl_compositor_set_csc_matrix(&ctx->compositor, mat);
+   return vl_compositor_init(vpipe, ctx->pipe);
 }
 
 static bool
@@ -707,11 +683,9 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
    ctx->base.create_surface = vl_mpeg12_create_surface;
    ctx->base.create_sampler_view = vl_mpeg12_create_sampler_view;
    ctx->base.create_buffer = vl_mpeg12_create_buffer;
-   ctx->base.render_picture = vl_mpeg12_render_picture;
    ctx->base.clear_sampler = vl_mpeg12_clear_sampler;
    ctx->base.upload_sampler = vl_mpeg12_upload_sampler;
-   ctx->base.set_picture_layers = vl_mpeg12_set_picture_layers;
-   ctx->base.set_csc_matrix = vl_mpeg12_set_csc_matrix;
+   ctx->base.create_compositor = vl_mpeg12_create_compositor;
 
    ctx->pipe = pipe;
    ctx->pot_buffers = pot_buffers;
@@ -731,26 +705,20 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
    if (!vl_mpeg12_mc_renderer_init(&ctx->mc, ctx->pipe, ctx->buffer_width, ctx->buffer_height))
       goto error_mc;
 
-   if (!vl_compositor_init(&ctx->compositor, ctx->pipe))
-      goto error_compositor;
-
    if (!init_pipe_state(ctx))
       goto error_pipe_state;
 
    return &ctx->base;
 
 error_pipe_state:
-      vl_compositor_cleanup(&ctx->compositor);
-
-error_compositor:
-      vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
+   vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
 
 error_mc:
-      vl_idct_cleanup(&ctx->idct_y);
-      vl_idct_cleanup(&ctx->idct_c);
+   vl_idct_cleanup(&ctx->idct_y);
+   vl_idct_cleanup(&ctx->idct_c);
 
 error_idct:
-      ctx->pipe->destroy(ctx->pipe);
-      FREE(ctx);
-      return NULL;
+   ctx->pipe->destroy(ctx->pipe);
+   FREE(ctx);
+   return NULL;
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
index 3cc052941b2..a90110bf61b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -52,7 +52,6 @@ struct vl_mpeg12_context
 
    struct vl_idct idct_y, idct_c;
    struct vl_mpeg12_mc_renderer mc;
-   struct vl_compositor compositor;
 
    void *rast;
    void *dsa;
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 7786f0a6f6f..dec524d5290 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -109,11 +109,17 @@ struct pipe_video_context
                          struct pipe_sampler_view *dst,
                          const struct pipe_box *dst_box,
                          const float *rgba);
+
    /**
     * Creates a buffer as decoding target
     */
    struct pipe_video_buffer *(*create_buffer)(struct pipe_video_context *vpipe);
 
+   /**
+    * Creates a video compositor
+    */
+   struct pipe_video_compositor *(*create_compositor)(struct pipe_video_context *vpipe);
+
    /**
     * Picture decoding and displaying
     */
@@ -124,38 +130,6 @@ struct pipe_video_context
                             struct pipe_buffer **bitstream_buf);
 #endif
 
-   /**
-    * render a video buffer to the frontbuffer
-    */
-   void (*render_picture)(struct pipe_video_context     *vpipe,
-                          struct pipe_video_buffer      *src_surface,
-                          struct pipe_video_rect        *src_area,
-                          enum pipe_mpeg12_picture_type picture_type,
-                          struct pipe_surface           *dst_surface,
-                          struct pipe_video_rect        *dst_area,
-                          struct pipe_fence_handle      **fence);
-
-   /*@}*/
-
-   /**
-    * Parameter-like states (or properties)
-    */
-   /*@{*/
-
-   /**
-    * set overlay samplers
-    */
-   void (*set_picture_layers)(struct pipe_video_context *vpipe,
-                              struct pipe_sampler_view *layers[],
-                              struct pipe_sampler_view *palettes[],
-                              struct pipe_video_rect *src_rects[],
-                              struct pipe_video_rect *dst_rects[],
-                              unsigned num_layers);
-
-   void (*set_csc_matrix)(struct pipe_video_context *vpipe, const float *mat);
-
-   /* TODO: Interface for scaling modes, post-processing, etc. */
-   /*@}*/
 };
 
 struct pipe_video_buffer
@@ -191,6 +165,74 @@ struct pipe_video_buffer
                  struct pipe_video_buffer *ref_frames[2],
                  struct pipe_fence_handle **fence);
 
+
+   void (*get_sampler_views)(struct pipe_video_buffer *buffer,
+                             struct pipe_sampler_view *sampler_views[3]);
+};
+
+struct pipe_video_compositor
+{
+   struct pipe_video_context* context;
+
+   /**
+    * destroy this compositor
+    */
+   void (*destroy)(struct pipe_video_compositor *compositor);
+
+   /**
+    * set yuv -> rgba conversion matrix
+    */
+   void (*set_csc_matrix)(struct pipe_video_compositor *compositor, const float mat[16]);
+
+   /**
+    * set overlay samplers
+    */
+   /*@{*/
+
+   /**
+    * reset all currently set layers
+    */
+   void (*clear_layers)(struct pipe_video_compositor *compositor);
+
+   /**
+    * set a video buffer as a layer to render
+    */
+   void (*set_buffer_layer)(struct pipe_video_compositor *compositor,
+                            unsigned layer,
+                            struct pipe_video_buffer *buffer,
+                            struct pipe_video_rect *src_rect,
+                            struct pipe_video_rect *dst_rect);
+
+   /**
+    * set a paletted sampler as a layer to render
+    */
+   void (*set_palette_layer)(struct pipe_video_compositor *compositor,
+                             unsigned layer,
+                             struct pipe_sampler_view *indexes,
+                             struct pipe_sampler_view *palette,
+                             struct pipe_video_rect *src_rect,
+                             struct pipe_video_rect *dst_rect);
+
+   /**
+    * set a rgba sampler as a layer to render
+    */
+   void (*set_rgba_layer)(struct pipe_video_compositor *compositor,
+                          unsigned layer,
+                          struct pipe_sampler_view *rgba,
+                          struct pipe_video_rect *src_rect,
+                          struct pipe_video_rect *dst_rect);
+
+   /*@}*/
+
+   /**
+    * render the layers to the frontbuffer
+    */
+   void (*render_picture)(struct pipe_video_compositor  *compositor,
+                          enum pipe_mpeg12_picture_type picture_type,
+                          struct pipe_surface           *dst_surface,
+                          struct pipe_video_rect        *dst_area,
+                          struct pipe_fence_handle      **fence);
+
 };
 
 #ifdef __cplusplus
diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index bcfd085de39..bdcba72d7cf 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -241,6 +241,15 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       return BadAlloc;
    }
 
+   context_priv->compositor = vctx->vpipe->create_compositor(vctx->vpipe);
+   if (!context_priv->compositor) {
+      XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL compositor.\n");
+      vl_video_destroy(vctx);
+      vl_screen_destroy(vscreen);
+      FREE(context_priv);
+      return BadAlloc;
+   }
+
    /* TODO: Define some Xv attribs to allow users to specify color standard, procamp */
    vl_csc_get_matrix
    (
@@ -248,7 +257,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       VL_CSC_COLOR_STANDARD_IDENTITY : VL_CSC_COLOR_STANDARD_BT_601,
       NULL, true, csc
    );
-   vctx->vpipe->set_csc_matrix(vctx->vpipe, csc);
+   context_priv->compositor->set_csc_matrix(context_priv->compositor, csc);
 
    context_priv->vctx = vctx;
    context_priv->subpicture_max_width = subpic_max_w;
@@ -286,6 +295,7 @@ Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
    context_priv = context->privData;
    vctx = context_priv->vctx;
    vscreen = vctx->vscreen;
+   context_priv->compositor->destroy(context_priv->compositor);
    vl_video_destroy(vctx);
    vl_screen_destroy(vscreen);
    FREE(context_priv);
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 23f97b3dac5..679b5cf6651 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -343,6 +343,8 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    static int dump_window = -1;
 
    struct pipe_video_context *vpipe;
+   struct pipe_video_compositor *compositor;
+
    XvMCSurfacePrivate *surface_priv;
    XvMCContextPrivate *context_priv;
    XvMCSubpicturePrivate *subpicture_priv;
@@ -383,27 +385,30 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
 
    subpicture_priv = surface_priv->subpicture ? surface_priv->subpicture->privData : NULL;
    vpipe = context_priv->vctx->vpipe;
+   compositor = context_priv->compositor;
+
+   unmap_and_flush_surface(surface_priv);
+
+   compositor->clear_layers(compositor);
+   compositor->set_buffer_layer(compositor, 0, surface_priv->pipe_buffer, &src_rect, NULL);
 
    if (subpicture_priv) {
       struct pipe_video_rect src_rect = {surface_priv->subx, surface_priv->suby, surface_priv->subw, surface_priv->subh};
       struct pipe_video_rect dst_rect = {surface_priv->surfx, surface_priv->surfy, surface_priv->surfw, surface_priv->surfh};
-      struct pipe_video_rect *src_rects[1] = {&src_rect};
-      struct pipe_video_rect *dst_rects[1] = {&dst_rect};
 
       XVMC_MSG(XVMC_TRACE, "[XvMC] Surface %p has subpicture %p.\n", surface, surface_priv->subpicture);
 
       assert(subpicture_priv->surface == surface);
-      vpipe->set_picture_layers(vpipe, &subpicture_priv->sampler, &subpicture_priv->palette, src_rects, dst_rects, 1);
+      if (subpicture_priv->palette)
+         compositor->set_palette_layer(compositor, 1, subpicture_priv->sampler, subpicture_priv->palette, &src_rect, &dst_rect);
+      else
+         compositor->set_rgba_layer(compositor, 1, subpicture_priv->sampler, &src_rect, &dst_rect);
 
       surface_priv->subpicture = NULL;
       subpicture_priv->surface = NULL;
    }
-   else
-      vpipe->set_picture_layers(vpipe, NULL, NULL, NULL, NULL, 0);
 
-   unmap_and_flush_surface(surface_priv);
-   vpipe->render_picture(vpipe, surface_priv->pipe_buffer, &src_rect, PictureToPipe(flags),
-                         drawable_surface, &dst_rect, &surface_priv->disp_fence);
+   compositor->render_picture(compositor, PictureToPipe(flags), drawable_surface, &dst_rect, &surface_priv->disp_fence);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 330c8c2cf9d..29518b36dbf 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -42,6 +42,8 @@ struct pipe_fence_handle;
 typedef struct
 {
    struct vl_context *vctx;
+   struct pipe_video_compositor *compositor;
+
    unsigned short subpicture_max_width;
    unsigned short subpicture_max_height;
 } XvMCContextPrivate;
-- 
cgit v1.2.3


From e6176ce3719e6c6e88d31ae7307154386e83553b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 3 Apr 2011 19:57:49 +0200
Subject: [g3dvl] some more debugging output in xvmc st

---
 src/gallium/state_trackers/xorg/xvmc/surface.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 679b5cf6651..efbebaa4ccf 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -233,7 +233,8 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    XvMCSurfacePrivate *future_surface_priv;
    struct pipe_mpeg12_macroblock pipe_macroblocks[num_macroblocks];
 
-   XVMC_MSG(XVMC_TRACE, "[XvMC] Rendering to surface %p.\n", target_surface);
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Rendering to surface %p, with past %p and future %p\n",
+            target_surface, past_surface, future_surface);
 
    assert(dpy);
 
@@ -320,6 +321,8 @@ Status XvMCFlushSurface(Display *dpy, XvMCSurface *surface)
    // don't call flush here, because this is usually
    // called once for every slice instead of every frame
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Flushing surface %p\n", surface);
+
    return Success;
 }
 
@@ -331,6 +334,8 @@ Status XvMCSyncSurface(Display *dpy, XvMCSurface *surface)
    if (!surface)
       return XvMCBadSurface;
 
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Syncing surface %p\n", surface);
+
    return Success;
 }
 
-- 
cgit v1.2.3


From 087e17f52e6391cecc002066f53dadc8f1b53074 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 3 Apr 2011 20:49:15 +0200
Subject: [g3dvl] fix vertex buffer size calculation

---
 src/gallium/auxiliary/vl/vl_mpeg12_context.c | 4 ++--
 src/gallium/auxiliary/vl/vl_mpeg12_context.h | 1 -
 src/gallium/auxiliary/vl/vl_vertex_buffers.c | 1 +
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index d8605398d49..0eab9e3c614 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -332,7 +332,8 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
    pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, ctx->quads.buffer);
 
    buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, ctx->pipe,
-                                                      ctx->vertex_buffer_size);
+                                                      ctx->buffer_width / MACROBLOCK_WIDTH *
+                                                      ctx->buffer_height / MACROBLOCK_HEIGHT);
    if (!buffer->vertex_bufs.individual.stream.buffer)
       goto error_vertex_stream;
 
@@ -691,7 +692,6 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
    ctx->pot_buffers = pot_buffers;
 
    ctx->quads = vl_vb_upload_quads(ctx->pipe, 2, 2);
-   ctx->vertex_buffer_size = width / MACROBLOCK_WIDTH * height / MACROBLOCK_HEIGHT;
    ctx->ves_y = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_X);
    ctx->ves_cb = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Y);
    ctx->ves_cr = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Z);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
index a90110bf61b..698522f16ea 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -47,7 +47,6 @@ struct vl_mpeg12_context
    const unsigned (*empty_block_mask)[3][2][2];
 
    struct pipe_vertex_buffer quads;
-   unsigned vertex_buffer_size;
    void *ves_y, *ves_cb, *ves_cr;
 
    struct vl_idct idct_y, idct_c;
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 59aa1e9db75..1094b76ec0d 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -264,6 +264,7 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *
 
    assert(buffer);
    assert(mb);
+   assert(buffer->num_not_empty + buffer->num_empty < buffer->size);
 
    if(mb->cbp)
       stream = buffer->start + buffer->num_not_empty++;
-- 
cgit v1.2.3


From ebd564587a46891cb2729c6a0ed84b7c27dc23ec Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 3 Apr 2011 22:01:15 +0200
Subject: g3dvl/vdpau: some more indention fixes

---
 src/gallium/state_trackers/vdpau/device.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index 0f9b7b6f5d8..d3314d0d4bf 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -32,7 +32,6 @@
 #include <util/u_debug.h>
 #include "vdpau_private.h"
 
-
 PUBLIC VdpStatus
 vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
                           VdpGetProcAddress **get_proc_address)
@@ -57,8 +56,7 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
    dev->display = display;
    dev->screen = screen;
    dev->vscreen = vl_screen_create(display, screen);
-   if (!dev->vscreen)
-	   {
+   if (!dev->vscreen) {
       ret = VDP_STATUS_RESOURCES;
       goto no_vscreen;
    }
@@ -107,14 +105,14 @@ vlVdpPresentationQueueTargetCreateX11(VdpDevice device, Drawable drawable,
    pqt->device = dev;
    pqt->drawable = drawable;
 
-	*target = vlAddDataHTAB(pqt);
+   *target = vlAddDataHTAB(pqt);
    if (*target == 0) {
       ret = VDP_STATUS_ERROR;
       goto no_handle;
    }
 
-
    return VDP_STATUS_OK;
+
 no_handle:
    FREE(dev);
    return ret;
@@ -191,5 +189,6 @@ vlVdpGetErrorString (VdpStatus status)
       all supplied surfaces must have been created within the context of the same VdpDevice object. \
       This error is raised if they were not.");
    _ERROR_TYPE(VDP_STATUS_ERROR,"A catch-all error, used when no other error code applies.");
+   default: return "Unknown Error";
    }
 }
-- 
cgit v1.2.3


From 3a2b906805985e0a4258bcbaed4cdff758875514 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 4 Apr 2011 23:28:18 +0200
Subject: [g3dvl] rename ycbcr buffer to video buffer and add some more
 functionality

---
 src/gallium/auxiliary/Makefile               |   2 +-
 src/gallium/auxiliary/vl/vl_idct.c           |   1 -
 src/gallium/auxiliary/vl/vl_idct.h           |   2 -
 src/gallium/auxiliary/vl/vl_mpeg12_context.c | 191 +++++++++++++-------------
 src/gallium/auxiliary/vl/vl_mpeg12_context.h |  15 ++-
 src/gallium/auxiliary/vl/vl_video_buffer.c   | 190 ++++++++++++++++++++++++++
 src/gallium/auxiliary/vl/vl_video_buffer.h   |  84 ++++++++++++
 src/gallium/auxiliary/vl/vl_ycbcr_buffer.c   | 195 ---------------------------
 src/gallium/auxiliary/vl/vl_ycbcr_buffer.h   |  89 ------------
 9 files changed, 378 insertions(+), 391 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_video_buffer.c
 create mode 100644 src/gallium/auxiliary/vl/vl_video_buffer.h
 delete mode 100644 src/gallium/auxiliary/vl/vl_ycbcr_buffer.c
 delete mode 100644 src/gallium/auxiliary/vl/vl_ycbcr_buffer.h

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 0464df84e12..b4ad059ec90 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -154,7 +154,7 @@ C_SOURCES = \
 	vl/vl_csc.c \
         vl/vl_idct.c \
         vl/vl_vertex_buffers.c \
-        vl/vl_ycbcr_buffer.c
+        vl/vl_video_buffer.c
 
 GALLIVM_SOURCES = \
         gallivm/lp_bld_arit.c \
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 6bebac1e88a..c92659bc5b0 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -27,7 +27,6 @@
 
 #include "vl_idct.h"
 #include "vl_vertex_buffers.h"
-#include "vl_ycbcr_buffer.h"
 #include "vl_defines.h"
 #include "util/u_draw.h"
 #include <assert.h>
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 2d6778d7ec4..fedebd3ff21 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -29,8 +29,6 @@
 #define vl_idct_h
 
 #include <pipe/p_state.h>
-#include "vl_vertex_buffers.h"
-#include "vl_ycbcr_buffer.h"
 
 /* shader based inverse distinct cosinus transformation
  * expect usage of vl_vertex_buffers as a todo list
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
index 0eab9e3c614..a83c240bcd9 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
@@ -62,7 +62,7 @@ upload_buffer(struct vl_mpeg12_context *ctx,
    for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
          if (mb->cbp & (*ctx->empty_block_mask)[0][y][x]) {
-            vl_idct_add_block(&buffer->idct_y, mb->mbx * 2 + x, mb->mby * 2 + y, blocks);
+            vl_idct_add_block(&buffer->idct[0], mb->mbx * 2 + x, mb->mby * 2 + y, blocks);
             blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
          }
       }
@@ -73,10 +73,7 @@ upload_buffer(struct vl_mpeg12_context *ctx,
 
    for (tb = 1; tb < 3; ++tb) {
       if (mb->cbp & (*ctx->empty_block_mask)[tb][0][0]) {
-         if(tb == 1)
-            vl_idct_add_block(&buffer->idct_cb, mb->mbx, mb->mby, blocks);
-         else
-            vl_idct_add_block(&buffer->idct_cr, mb->mbx, mb->mby, blocks);
+         vl_idct_add_block(&buffer->idct[tb], mb->mbx, mb->mby, blocks);
          blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
       }
    }
@@ -89,16 +86,16 @@ vl_mpeg12_buffer_destroy(struct pipe_video_buffer *buffer)
    struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)buf->base.context;
    assert(buf && ctx);
 
-   vl_ycbcr_buffer_cleanup(&buf->idct_source);
-   vl_ycbcr_buffer_cleanup(&buf->idct_2_mc);
-   vl_ycbcr_buffer_cleanup(&buf->render_result);
+   vl_video_buffer_cleanup(&buf->idct_source);
+   vl_video_buffer_cleanup(&buf->idct_2_mc);
+   vl_video_buffer_cleanup(&buf->render_result);
    vl_vb_cleanup(&buf->vertex_stream);
-   vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct_y);
-   vl_idct_cleanup_buffer(&ctx->idct_c, &buf->idct_cb);
-   vl_idct_cleanup_buffer(&ctx->idct_c, &buf->idct_cr);
-   vl_mpeg12_mc_cleanup_buffer(&buf->mc_y);
-   vl_mpeg12_mc_cleanup_buffer(&buf->mc_cb);
-   vl_mpeg12_mc_cleanup_buffer(&buf->mc_cr);
+   vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct[0]);
+   vl_idct_cleanup_buffer(&ctx->idct_c, &buf->idct[1]);
+   vl_idct_cleanup_buffer(&ctx->idct_c, &buf->idct[2]);
+   vl_mpeg12_mc_cleanup_buffer(&buf->mc[0]);
+   vl_mpeg12_mc_cleanup_buffer(&buf->mc[1]);
+   vl_mpeg12_mc_cleanup_buffer(&buf->mc[2]);
 
    FREE(buf);
 }
@@ -114,9 +111,9 @@ vl_mpeg12_buffer_map(struct pipe_video_buffer *buffer)
    assert(ctx);
 
    vl_vb_map(&buf->vertex_stream, ctx->pipe);
-   vl_idct_map_buffers(&ctx->idct_y, &buf->idct_y);
-   vl_idct_map_buffers(&ctx->idct_c, &buf->idct_cb);
-   vl_idct_map_buffers(&ctx->idct_c, &buf->idct_cr);
+   vl_idct_map_buffers(&ctx->idct_y, &buf->idct[0]);
+   vl_idct_map_buffers(&ctx->idct_c, &buf->idct[1]);
+   vl_idct_map_buffers(&ctx->idct_c, &buf->idct[2]);
 }
 
 static void
@@ -155,9 +152,9 @@ vl_mpeg12_buffer_unmap(struct pipe_video_buffer *buffer)
    assert(ctx);
 
    vl_vb_unmap(&buf->vertex_stream, ctx->pipe);
-   vl_idct_unmap_buffers(&ctx->idct_y, &buf->idct_y);
-   vl_idct_unmap_buffers(&ctx->idct_c, &buf->idct_cb);
-   vl_idct_unmap_buffers(&ctx->idct_c, &buf->idct_cr);
+   vl_idct_unmap_buffers(&ctx->idct_y, &buf->idct[0]);
+   vl_idct_unmap_buffers(&ctx->idct_c, &buf->idct[1]);
+   vl_idct_unmap_buffers(&ctx->idct_c, &buf->idct[2]);
 }
 
 static void
@@ -169,56 +166,40 @@ vl_mpeg12_buffer_flush(struct pipe_video_buffer *buffer,
    struct vl_mpeg12_buffer *past = (struct vl_mpeg12_buffer *)refs[0];
    struct vl_mpeg12_buffer *future = (struct vl_mpeg12_buffer *)refs[1];
 
-   struct vl_ycbcr_surfaces *surfaces;
-   struct vl_ycbcr_sampler_views *sv_past;
-   struct vl_ycbcr_sampler_views *sv_future;
+   vl_surfaces *surfaces;
+   vl_sampler_views *sv_past;
+   vl_sampler_views *sv_future;
 
    struct pipe_sampler_view *sv_refs[2];
    unsigned ne_start, ne_num, e_start, e_num;
    struct vl_mpeg12_context *ctx;
+   unsigned i;
 
    assert(buf);
 
    ctx = (struct vl_mpeg12_context *)buf->base.context;
    assert(ctx);
 
-   surfaces = vl_ycbcr_get_surfaces(&buf->render_result);
+   surfaces = vl_video_buffer_surfaces(&buf->render_result);
 
-   sv_past = past ? vl_ycbcr_get_sampler_views(&past->render_result) : NULL;
-   sv_future = future ? vl_ycbcr_get_sampler_views(&future->render_result) : NULL;
+   sv_past = past ? vl_video_buffer_sampler_views(&past->render_result) : NULL;
+   sv_future = future ? vl_video_buffer_sampler_views(&future->render_result) : NULL;
 
    vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
 
    ctx->pipe->set_vertex_buffers(ctx->pipe, 2, buf->vertex_bufs.all);
    ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
 
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves[i]);
+      vl_idct_flush(i == 0 ? &ctx->idct_y : &ctx->idct_c, &buf->idct[i], ne_num);
 
-   ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves_y);
-   vl_idct_flush(&ctx->idct_y, &buf->idct_y, ne_num);
-
-   sv_refs[0] = sv_past ? sv_past->y : NULL;
-   sv_refs[1] = sv_future ? sv_future->y : NULL;
-
-   vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc_y, surfaces->y,
-                               sv_refs, ne_start, ne_num, e_start, e_num, fence);
-
-   ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves_cb);
-   vl_idct_flush(&ctx->idct_c, &buf->idct_cb, ne_num);
-
-   sv_refs[0] = sv_past ? sv_past->cb : NULL;
-   sv_refs[1] = sv_future ? sv_future->cb : NULL;
-
-   vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc_cb, surfaces->cb,
-                               sv_refs, ne_start, ne_num, e_start, e_num, fence);
+      sv_refs[0] = sv_past ? (*sv_past)[i] : NULL;
+      sv_refs[1] = sv_future ? (*sv_future)[i] : NULL;
 
-   ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves_cr);
-   vl_idct_flush(&ctx->idct_c, &buf->idct_cr, ne_num);
-
-   sv_refs[0] = sv_past ? sv_past->cr : NULL;
-   sv_refs[1] = sv_future ? sv_future->cr : NULL;
-
-   vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc_cr, surfaces->cr,
-                               sv_refs, ne_start, ne_num, e_start, e_num, fence);
+      vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc[i], (*surfaces)[i],
+                                  sv_refs, ne_start, ne_num, e_start, e_num, fence);
+   }
 }
 
 static void
@@ -226,17 +207,17 @@ vl_mpeg12_buffer_get_sampler_views(struct pipe_video_buffer *buffer,
                                    struct pipe_sampler_view *sampler_views[3])
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_ycbcr_sampler_views *samplers;
+   vl_sampler_views *samplers;
+   unsigned i;
 
    assert(buf);
 
-   samplers = vl_ycbcr_get_sampler_views(&buf->render_result);
+   samplers = vl_video_buffer_sampler_views(&buf->render_result);
 
    assert(samplers);
 
-   pipe_sampler_view_reference(&sampler_views[0], samplers->y);
-   pipe_sampler_view_reference(&sampler_views[1], samplers->cb);
-   pipe_sampler_view_reference(&sampler_views[2], samplers->cr);
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      pipe_sampler_view_reference(&sampler_views[i], (*samplers)[i]);
 }
 
 static void
@@ -257,9 +238,9 @@ vl_mpeg12_destroy(struct pipe_video_context *vpipe)
    vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
    vl_idct_cleanup(&ctx->idct_y);
    vl_idct_cleanup(&ctx->idct_c);
-   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves_y);
-   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves_cb);
-   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves_cr);
+   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves[0]);
+   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves[1]);
+   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves[2]);
    pipe_resource_reference(&ctx->quads.buffer, NULL);
    ctx->pipe->destroy(ctx->pipe);
 
@@ -307,11 +288,29 @@ vl_mpeg12_create_sampler_view(struct pipe_video_context *vpipe,
 static struct pipe_video_buffer *
 vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
 {
+   const enum pipe_format idct_source_formats[3] = {
+      PIPE_FORMAT_R16G16B16A16_SNORM,
+      PIPE_FORMAT_R16G16B16A16_SNORM,
+      PIPE_FORMAT_R16G16B16A16_SNORM
+   };
+
+   const enum pipe_format idct_2_mc_formats[3] = {
+      PIPE_FORMAT_R16_SNORM,
+      PIPE_FORMAT_R16_SNORM,
+      PIPE_FORMAT_R16_SNORM
+   };
+
+   const enum pipe_format render_result_formats[3] = {
+      PIPE_FORMAT_R8_SNORM,
+      PIPE_FORMAT_R8_SNORM,
+      PIPE_FORMAT_R8_SNORM
+   };
+
    struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
    struct vl_mpeg12_buffer *buffer;
 
-   struct vl_ycbcr_sampler_views *idct_views, *mc_views;
-   struct vl_ycbcr_surfaces *idct_surfaces;
+   vl_sampler_views *idct_views, *mc_views;
+   vl_surfaces *idct_surfaces;
 
    assert(ctx);
 
@@ -337,88 +336,88 @@ vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
    if (!buffer->vertex_bufs.individual.stream.buffer)
       goto error_vertex_stream;
 
-   if (!vl_ycbcr_buffer_init(&buffer->idct_source, ctx->pipe,
-                             ctx->buffer_width, ctx->buffer_height,
-                             ctx->base.chroma_format,
-                             PIPE_FORMAT_R16G16B16A16_SNORM,
+   if (!vl_video_buffer_init(&buffer->idct_source, ctx->pipe,
+                             ctx->buffer_width / 4, ctx->buffer_height, 1,
+                             ctx->base.chroma_format, 3,
+                             idct_source_formats,
                              PIPE_USAGE_STREAM))
       goto error_idct_source;
 
-   if (!vl_ycbcr_buffer_init(&buffer->idct_2_mc, ctx->pipe,
-                             ctx->buffer_width, ctx->buffer_height,
-                             ctx->base.chroma_format,
-                             PIPE_FORMAT_R16_SNORM,
+   if (!vl_video_buffer_init(&buffer->idct_2_mc, ctx->pipe,
+                             ctx->buffer_width, ctx->buffer_height, 1,
+                             ctx->base.chroma_format, 3,
+                             idct_2_mc_formats,
                              PIPE_USAGE_STATIC))
       goto error_idct_2_mc;
 
-   if (!vl_ycbcr_buffer_init(&buffer->render_result, ctx->pipe,
-                             ctx->buffer_width, ctx->buffer_height,
-                             ctx->base.chroma_format,
-                             PIPE_FORMAT_R8_SNORM,
+   if (!vl_video_buffer_init(&buffer->render_result, ctx->pipe,
+                             ctx->buffer_width, ctx->buffer_height, 1,
+                             ctx->base.chroma_format, 3,
+                             render_result_formats,
                              PIPE_USAGE_STATIC))
       goto error_render_result;
 
-   idct_views = vl_ycbcr_get_sampler_views(&buffer->idct_source);
+   idct_views = vl_video_buffer_sampler_views(&buffer->idct_source);
    if (!idct_views)
       goto error_idct_views;
 
-   idct_surfaces = vl_ycbcr_get_surfaces(&buffer->idct_2_mc);
+   idct_surfaces = vl_video_buffer_surfaces(&buffer->idct_2_mc);
    if (!idct_surfaces)
       goto error_idct_surfaces;
 
-   if (!vl_idct_init_buffer(&ctx->idct_y, &buffer->idct_y,
-                            idct_views->y, idct_surfaces->y))
+   if (!vl_idct_init_buffer(&ctx->idct_y, &buffer->idct[0],
+                            (*idct_views)[0], (*idct_surfaces)[0]))
       goto error_idct_y;
 
-   if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct_cb,
-                            idct_views->cb, idct_surfaces->cb))
+   if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct[1],
+                            (*idct_views)[1], (*idct_surfaces)[1]))
       goto error_idct_cb;
 
-   if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct_cr,
-                            idct_views->cr, idct_surfaces->cr))
+   if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct[2],
+                            (*idct_views)[2], (*idct_surfaces)[2]))
       goto error_idct_cr;
 
-   mc_views = vl_ycbcr_get_sampler_views(&buffer->idct_2_mc);
+   mc_views = vl_video_buffer_sampler_views(&buffer->idct_2_mc);
    if (!mc_views)
       goto error_mc_views;
 
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_y, mc_views->y))
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc[0], (*mc_views)[0]))
       goto error_mc_y;
 
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_cb, mc_views->cb))
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc[1], (*mc_views)[1]))
       goto error_mc_cb;
 
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc_cr, mc_views->cr))
+   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc[2], (*mc_views)[2]))
       goto error_mc_cr;
 
    return &buffer->base;
 
 error_mc_cr:
-   vl_mpeg12_mc_cleanup_buffer(&buffer->mc_cb);
+   vl_mpeg12_mc_cleanup_buffer(&buffer->mc[1]);
 
 error_mc_cb:
-   vl_mpeg12_mc_cleanup_buffer(&buffer->mc_y);
+   vl_mpeg12_mc_cleanup_buffer(&buffer->mc[0]);
 
 error_mc_y:
 error_mc_views:
-   vl_idct_cleanup_buffer(&ctx->idct_c, &buffer->idct_cr);
+   vl_idct_cleanup_buffer(&ctx->idct_c, &buffer->idct[2]);
 
 error_idct_cr:
-   vl_idct_cleanup_buffer(&ctx->idct_c, &buffer->idct_cb);
+   vl_idct_cleanup_buffer(&ctx->idct_c, &buffer->idct[1]);
 
 error_idct_cb:
-   vl_idct_cleanup_buffer(&ctx->idct_y, &buffer->idct_y);
+   vl_idct_cleanup_buffer(&ctx->idct_y, &buffer->idct[0]);
 
 error_idct_y:
 error_idct_surfaces:
 error_idct_views:
-   vl_ycbcr_buffer_cleanup(&buffer->render_result);
+   vl_video_buffer_cleanup(&buffer->render_result);
 
 error_render_result:
-   vl_ycbcr_buffer_cleanup(&buffer->idct_2_mc);
+   vl_video_buffer_cleanup(&buffer->idct_2_mc);
 
 error_idct_2_mc:
-   vl_ycbcr_buffer_cleanup(&buffer->idct_source);
+   vl_video_buffer_cleanup(&buffer->idct_source);
 
 error_idct_source:
    vl_vb_cleanup(&buffer->vertex_stream);
@@ -692,9 +691,9 @@ vl_create_mpeg12_context(struct pipe_context *pipe,
    ctx->pot_buffers = pot_buffers;
 
    ctx->quads = vl_vb_upload_quads(ctx->pipe, 2, 2);
-   ctx->ves_y = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_X);
-   ctx->ves_cb = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Y);
-   ctx->ves_cr = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Z);
+   ctx->ves[0] = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_X);
+   ctx->ves[1] = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Y);
+   ctx->ves[2] = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Z);
 
    ctx->buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
    ctx->buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
index 698522f16ea..94a5dad5571 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
@@ -32,7 +32,8 @@
 #include "vl_idct.h"
 #include "vl_mpeg12_mc_renderer.h"
 #include "vl_compositor.h"
-#include "vl_ycbcr_buffer.h"
+#include "vl_video_buffer.h"
+#include "vl_vertex_buffers.h"
 
 struct pipe_screen;
 struct pipe_context;
@@ -47,7 +48,7 @@ struct vl_mpeg12_context
    const unsigned (*empty_block_mask)[3][2][2];
 
    struct pipe_vertex_buffer quads;
-   void *ves_y, *ves_cb, *ves_cr;
+   void *ves[VL_MAX_PLANES];
 
    struct vl_idct idct_y, idct_c;
    struct vl_mpeg12_mc_renderer mc;
@@ -61,9 +62,9 @@ struct vl_mpeg12_buffer
 {
    struct pipe_video_buffer base;
 
-   struct vl_ycbcr_buffer idct_source;
-   struct vl_ycbcr_buffer idct_2_mc;
-   struct vl_ycbcr_buffer render_result;
+   struct vl_video_buffer idct_source;
+   struct vl_video_buffer idct_2_mc;
+   struct vl_video_buffer render_result;
 
    struct vl_vertex_buffer vertex_stream;
 
@@ -75,8 +76,8 @@ struct vl_mpeg12_buffer
       } individual;
    } vertex_bufs;
 
-   struct vl_idct_buffer idct_y, idct_cb, idct_cr;
-   struct vl_mpeg12_mc_buffer mc_y, mc_cb, mc_cr;
+   struct vl_idct_buffer idct[VL_MAX_PLANES];
+   struct vl_mpeg12_mc_buffer mc[VL_MAX_PLANES];
 };
 
 /* drivers can call this function in their pipe_video_context constructors and pass it
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
new file mode 100644
index 00000000000..f0b3d192eb5
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -0,0 +1,190 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vl_video_buffer.h"
+#include <util/u_format.h>
+#include <util/u_inlines.h>
+#include <util/u_sampler.h>
+#include <pipe/p_screen.h>
+#include <pipe/p_context.h>
+#include <assert.h>
+
+bool vl_video_buffer_init(struct vl_video_buffer *buffer,
+                          struct pipe_context *pipe,
+                          unsigned width, unsigned height, unsigned depth,
+                          enum pipe_video_chroma_format chroma_format,
+                          unsigned num_planes,
+                          const enum pipe_format resource_format[VL_MAX_PLANES],
+                          unsigned usage)
+{
+   struct pipe_resource templ;
+   unsigned i;
+
+   assert(buffer && pipe);
+   assert(num_planes > 0 && num_planes <= VL_MAX_PLANES);
+
+   memset(buffer, 0, sizeof(struct vl_video_buffer));
+   buffer->pipe = pipe;
+   buffer->num_planes = num_planes;
+
+   memset(&templ, 0, sizeof(templ));
+   templ.target = PIPE_TEXTURE_2D;
+   templ.format = resource_format[0];
+   templ.width0 = width;
+   templ.height0 = height;
+   templ.depth0 = depth;
+   templ.array_size = 1;
+   templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   templ.usage = usage;
+
+   buffer->resources[0] = pipe->screen->resource_create(pipe->screen, &templ);
+   if (!buffer->resources[0])
+      goto error;
+
+   if (num_planes == 1) {
+      assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444);
+      return true;
+   }
+
+   templ.format = resource_format[1];
+   if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+      if (depth > 1)
+         templ.depth0 /= 2;
+      else
+         templ.width0 /= 2;
+      templ.height0 /= 2;
+   } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
+      if (depth > 1)
+         templ.depth0 /= 2;
+      else
+         templ.height0 /= 2;
+   }
+
+   buffer->resources[1] = pipe->screen->resource_create(pipe->screen, &templ);
+   if (!buffer->resources[1])
+      goto error;
+
+   if (num_planes == 2)
+      return true;
+
+   templ.format = resource_format[2];
+   buffer->resources[2] = pipe->screen->resource_create(pipe->screen, &templ);
+   if (!buffer->resources[2])
+      goto error;
+
+   return true;
+
+error:
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      pipe_resource_reference(&buffer->resources[i], NULL);
+
+   return false;
+}
+
+static inline void
+adjust_swizzle(struct pipe_sampler_view *sv_templ)
+{
+   if (util_format_get_nr_components(sv_templ->format) == 1) {
+      sv_templ->swizzle_r = PIPE_SWIZZLE_RED;
+      sv_templ->swizzle_g = PIPE_SWIZZLE_RED;
+      sv_templ->swizzle_b = PIPE_SWIZZLE_RED;
+      sv_templ->swizzle_a = PIPE_SWIZZLE_RED;
+   }
+}
+
+vl_sampler_views *vl_video_buffer_sampler_views(struct vl_video_buffer *buffer)
+{
+   struct pipe_sampler_view sv_templ;
+   struct pipe_context *pipe;
+   unsigned i;
+
+   assert(buffer);
+
+   pipe = buffer->pipe;
+
+   for (i = 0; i < buffer->num_planes; ++i ) {
+      if (!buffer->sampler_views[i]) {
+         memset(&sv_templ, 0, sizeof(sv_templ));
+         u_sampler_view_default_template(&sv_templ, buffer->resources[i], buffer->resources[i]->format);
+         adjust_swizzle(&sv_templ);
+         buffer->sampler_views[i] = pipe->create_sampler_view(pipe, buffer->resources[i], &sv_templ);
+         if (!buffer->sampler_views[i])
+            goto error;
+      }
+   }
+
+   return &buffer->sampler_views;
+
+error:
+   for (i = 0; i < buffer->num_planes; ++i )
+      pipe_sampler_view_reference(&buffer->sampler_views[i], NULL);
+
+   return NULL;
+}
+
+vl_surfaces *vl_video_buffer_surfaces(struct vl_video_buffer *buffer)
+{
+   struct pipe_surface surf_templ;
+   struct pipe_context *pipe;
+   unsigned i;
+
+   assert(buffer);
+
+   pipe = buffer->pipe;
+
+   for (i = 0; i < buffer->num_planes; ++i ) {
+      if (!buffer->surfaces[i]) {
+         memset(&surf_templ, 0, sizeof(surf_templ));
+         surf_templ.format = buffer->resources[i]->format;
+         surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+         buffer->surfaces[i] = pipe->create_surface(pipe, buffer->resources[i], &surf_templ);
+         if (!buffer->surfaces[i])
+            goto error;
+      }
+   }
+
+   return &buffer->surfaces;
+
+error:
+   for (i = 0; i < buffer->num_planes; ++i )
+      pipe_surface_reference(&buffer->surfaces[i], NULL);
+
+   return NULL;
+}
+
+void vl_video_buffer_cleanup(struct vl_video_buffer *buffer)
+{
+   unsigned i;
+
+   assert(buffer);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      pipe_surface_reference(&buffer->surfaces[i], NULL);
+      pipe_sampler_view_reference(&buffer->sampler_views[i], NULL);
+      pipe_resource_reference(&buffer->resources[i], NULL);
+   }
+}
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h
new file mode 100644
index 00000000000..adba6c56e88
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.h
@@ -0,0 +1,84 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_ycbcr_buffer_h
+#define vl_ycbcr_buffer_h
+
+#include <pipe/p_state.h>
+
+#define VL_MAX_PLANES 3
+
+/**
+ * implementation of a planar ycbcr buffer
+ */
+
+/* resources of a buffer */
+typedef struct pipe_resource *vl_resources[VL_MAX_PLANES];
+
+/* sampler views of a buffer */
+typedef struct pipe_sampler_view *vl_sampler_views[VL_MAX_PLANES];
+
+/* surfaces of a buffer */
+typedef struct pipe_surface *vl_surfaces[VL_MAX_PLANES];
+
+/* planar buffer for vl data upload and manipulation */
+struct vl_video_buffer
+{
+   struct pipe_context *pipe;
+   unsigned            num_planes;
+   vl_resources        resources;
+   vl_sampler_views    sampler_views;
+   vl_surfaces         surfaces;
+};
+
+/**
+ * initialize a buffer, creating its resources
+ */
+bool vl_video_buffer_init(struct vl_video_buffer *buffer,
+                          struct pipe_context *pipe,
+                          unsigned width, unsigned height, unsigned depth,
+                          enum pipe_video_chroma_format chroma_format,
+                          unsigned num_planes,
+                          const enum pipe_format resource_formats[VL_MAX_PLANES],
+                          unsigned usage);
+
+/**
+ * create default sampler views for the buffer on demand
+ */
+vl_sampler_views *vl_video_buffer_sampler_views(struct vl_video_buffer *buffer);
+
+/**
+ * create default surfaces for the buffer on demand
+ */
+vl_surfaces *vl_video_buffer_surfaces(struct vl_video_buffer *buffer);
+
+/**
+ * cleanup the buffer destroying all its resources
+ */
+void vl_video_buffer_cleanup(struct vl_video_buffer *buffer);
+
+#endif
diff --git a/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c b/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c
deleted file mode 100644
index c67cec69723..00000000000
--- a/src/gallium/auxiliary/vl/vl_ycbcr_buffer.c
+++ /dev/null
@@ -1,195 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2011 Christian König.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "vl_ycbcr_buffer.h"
-#include <util/u_format.h>
-#include <util/u_inlines.h>
-#include <util/u_sampler.h>
-#include <pipe/p_screen.h>
-#include <pipe/p_context.h>
-#include <assert.h>
-
-bool vl_ycbcr_buffer_init(struct vl_ycbcr_buffer *buffer,
-                          struct pipe_context *pipe,
-                          unsigned width, unsigned height,
-                          enum pipe_video_chroma_format chroma_format,
-                          enum pipe_format resource_format,
-                          unsigned usage)
-{
-   struct pipe_resource templ;
-
-   assert(buffer && pipe);
-
-   memset(buffer, 0, sizeof(struct vl_ycbcr_buffer));
-   buffer->pipe = pipe;
-
-   memset(&templ, 0, sizeof(templ));
-   templ.target = PIPE_TEXTURE_2D;
-   templ.format = resource_format;
-   templ.width0 = width / util_format_get_nr_components(resource_format);
-   templ.height0 = height;
-   templ.depth0 = 1;
-   templ.array_size = 1;
-   templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-   templ.usage = usage;
-
-   buffer->resources.y = pipe->screen->resource_create(pipe->screen, &templ);
-   if (!buffer->resources.y)
-      goto error_resource_y;
-
-   if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      templ.width0 /= 2;
-      templ.height0 /= 2;
-   } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
-      templ.height0 /= 2;
-   }
-
-   buffer->resources.cb = pipe->screen->resource_create(pipe->screen, &templ);
-   if (!buffer->resources.cb)
-      goto error_resource_cb;
-
-   buffer->resources.cr = pipe->screen->resource_create(pipe->screen, &templ);
-   if (!buffer->resources.cr)
-      goto error_resource_cr;
-
-   return true;
-
-error_resource_cr:
-   pipe_resource_reference(&buffer->resources.cb, NULL);
-
-error_resource_cb:
-   pipe_resource_reference(&buffer->resources.y, NULL);
-
-error_resource_y:
-   return false;
-}
-
-struct vl_ycbcr_sampler_views *vl_ycbcr_get_sampler_views(struct vl_ycbcr_buffer *buffer)
-{
-   struct pipe_sampler_view sv_templ;
-   struct pipe_context *pipe;
-
-   assert(buffer);
-
-   pipe = buffer->pipe;
-
-   memset(&sv_templ, 0, sizeof(sv_templ));
-   u_sampler_view_default_template(&sv_templ, buffer->resources.y, buffer->resources.y->format);
-
-   if (util_format_get_nr_components(buffer->resources.y->format) == 1) {
-      sv_templ.swizzle_r = PIPE_SWIZZLE_RED;
-      sv_templ.swizzle_g = PIPE_SWIZZLE_RED;
-      sv_templ.swizzle_b = PIPE_SWIZZLE_RED;
-      sv_templ.swizzle_a = PIPE_SWIZZLE_RED;
-   }
-
-   if (!buffer->sampler_views.y) {
-      buffer->sampler_views.y = pipe->create_sampler_view(pipe, buffer->resources.y, &sv_templ);
-      if (!buffer->sampler_views.y)
-         goto error;
-   }
-
-   if (!buffer->sampler_views.cb) {
-      buffer->sampler_views.cb = pipe->create_sampler_view(pipe, buffer->resources.cb, &sv_templ);
-      if (!buffer->sampler_views.cb)
-         goto error;
-   }
-
-   if (!buffer->sampler_views.cr) {
-      buffer->sampler_views.cr = pipe->create_sampler_view(pipe, buffer->resources.cr, &sv_templ);
-      if (!buffer->sampler_views.cr)
-         goto error;
-   }
-
-   return &buffer->sampler_views;
-
-error:
-   pipe_sampler_view_reference(&buffer->sampler_views.y, NULL);
-   pipe_sampler_view_reference(&buffer->sampler_views.cb, NULL);
-   pipe_sampler_view_reference(&buffer->sampler_views.cr, NULL);
-   return NULL;
-}
-
-struct vl_ycbcr_surfaces *vl_ycbcr_get_surfaces(struct vl_ycbcr_buffer *buffer)
-{
-   struct pipe_surface surf_templ;
-   struct pipe_context *pipe;
-
-   assert(buffer);
-
-   pipe = buffer->pipe;
-
-   if (!buffer->surfaces.y) {
-      memset(&surf_templ, 0, sizeof(surf_templ));
-      surf_templ.format = buffer->resources.y->format;
-      surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-      buffer->surfaces.y = pipe->create_surface(pipe, buffer->resources.y, &surf_templ);
-      if (!buffer->surfaces.y)
-         goto error;
-   }
-
-   if (!buffer->surfaces.cb) {
-      memset(&surf_templ, 0, sizeof(surf_templ));
-      surf_templ.format = buffer->resources.cb->format;
-      surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-      buffer->surfaces.cb = pipe->create_surface(pipe, buffer->resources.cb, &surf_templ);
-      if (!buffer->surfaces.cb)
-         goto error;
-   }
-
-   if (!buffer->surfaces.cr) {
-      memset(&surf_templ, 0, sizeof(surf_templ));
-      surf_templ.format = buffer->resources.cr->format;
-      surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-      buffer->surfaces.cr = pipe->create_surface(pipe, buffer->resources.cr, &surf_templ);
-      if (!buffer->surfaces.cr)
-         goto error;
-   }
-
-   return &buffer->surfaces;
-
-error:
-   pipe_surface_reference(&buffer->surfaces.y, NULL);
-   pipe_surface_reference(&buffer->surfaces.cb, NULL);
-   pipe_surface_reference(&buffer->surfaces.cr, NULL);
-   return NULL;
-}
-
-void vl_ycbcr_buffer_cleanup(struct vl_ycbcr_buffer *buffer)
-{
-   pipe_surface_reference(&buffer->surfaces.y, NULL);
-   pipe_surface_reference(&buffer->surfaces.cb, NULL);
-   pipe_surface_reference(&buffer->surfaces.cr, NULL);
-
-   pipe_sampler_view_reference(&buffer->sampler_views.y, NULL);
-   pipe_sampler_view_reference(&buffer->sampler_views.cb, NULL);
-   pipe_sampler_view_reference(&buffer->sampler_views.cr, NULL);
-
-   pipe_resource_reference(&buffer->resources.y, NULL);
-   pipe_resource_reference(&buffer->resources.cb, NULL);
-   pipe_resource_reference(&buffer->resources.cr, NULL);
-}
diff --git a/src/gallium/auxiliary/vl/vl_ycbcr_buffer.h b/src/gallium/auxiliary/vl/vl_ycbcr_buffer.h
deleted file mode 100644
index a116ed2a882..00000000000
--- a/src/gallium/auxiliary/vl/vl_ycbcr_buffer.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2011 Christian König.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef vl_ycbcr_buffer_h
-#define vl_ycbcr_buffer_h
-
-#include <pipe/p_state.h>
-
-/**
- * implementation of a planar ycbcr buffer
- */
-
-/* resources of a buffer */
-struct vl_ycbcr_resources
-{
-   struct pipe_resource *y, *cb, *cr;
-};
-
-/* sampler views of a buffer */
-struct vl_ycbcr_sampler_views
-{
-   struct pipe_sampler_view *y, *cb, *cr;
-};
-
-/* surfaces of a buffer */
-struct vl_ycbcr_surfaces
-{
-   struct pipe_surface *y, *cb, *cr;
-};
-
-/* planar buffer for vl data upload and manipulation */
-struct vl_ycbcr_buffer
-{
-   struct pipe_context           *pipe;
-   struct vl_ycbcr_resources     resources;
-   struct vl_ycbcr_sampler_views sampler_views;
-   struct vl_ycbcr_surfaces      surfaces;
-};
-
-/**
- * initialize a buffer, creating its resources
- */
-bool vl_ycbcr_buffer_init(struct vl_ycbcr_buffer *buffer,
-                          struct pipe_context *pipe,
-                          unsigned width, unsigned height,
-                          enum pipe_video_chroma_format chroma_format,
-                          enum pipe_format resource_format,
-                          unsigned usage);
-
-/**
- * create default sampler views for the buffer on demand
- */
-struct vl_ycbcr_sampler_views *vl_ycbcr_get_sampler_views(struct vl_ycbcr_buffer *buffer);
-
-/**
- * create default surfaces for the buffer on demand
- */
-struct vl_ycbcr_surfaces *vl_ycbcr_get_surfaces(struct vl_ycbcr_buffer *buffer);
-
-/**
- * cleanup the buffer destroying all its resources
- */
-void vl_ycbcr_buffer_cleanup(struct vl_ycbcr_buffer *buffer);
-
-#endif
-- 
cgit v1.2.3


From d9ad3aa3b9647f1ede2568600978af956ff32fff Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 6 Apr 2011 00:06:20 +0200
Subject: [g3dvl] and finally split the decoder part out of the context

This should give a good basis to implement vdpau ontop of it.
---
 src/gallium/auxiliary/Makefile                     |   3 +-
 src/gallium/auxiliary/vl/vl_compositor.c           |   9 +-
 src/gallium/auxiliary/vl/vl_context.c              | 264 ++++++++
 src/gallium/auxiliary/vl/vl_context.h              |  49 ++
 src/gallium/auxiliary/vl/vl_mpeg12_context.c       | 723 ---------------------
 src/gallium/auxiliary/vl/vl_mpeg12_context.h       |  92 ---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c       | 561 ++++++++++++++++
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h       |  90 +++
 src/gallium/auxiliary/vl/vl_video_buffer.c         | 224 ++++---
 src/gallium/auxiliary/vl/vl_video_buffer.h         |  54 +-
 src/gallium/drivers/nv40/nv40_video_context.c      |  16 +-
 src/gallium/drivers/nv40/nv40_video_context.h      |   4 +-
 src/gallium/drivers/nvfx/nvfx_video_context.c      |  16 +-
 src/gallium/drivers/nvfx/nvfx_video_context.h      |   4 +-
 src/gallium/drivers/r600/r600_video_context.c      |  16 +-
 src/gallium/drivers/r600/r600_video_context.h      |   4 +-
 src/gallium/drivers/softpipe/sp_screen.c           |  17 +-
 src/gallium/include/pipe/p_screen.h                |  18 +-
 src/gallium/include/pipe/p_video_context.h         | 150 +++--
 src/gallium/state_trackers/xorg/xvmc/context.c     |  19 +-
 src/gallium/state_trackers/xorg/xvmc/surface.c     |  29 +-
 .../state_trackers/xorg/xvmc/xvmc_private.h        |   5 +-
 src/gallium/winsys/g3dvl/dri/dri_winsys.c          |  10 +-
 src/gallium/winsys/g3dvl/vl_winsys.h               |   5 +-
 24 files changed, 1283 insertions(+), 1099 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_context.c
 create mode 100644 src/gallium/auxiliary/vl/vl_context.h
 delete mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_context.c
 delete mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_context.h
 create mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
 create mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index b4ad059ec90..425ae78138b 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -147,9 +147,10 @@ C_SOURCES = \
 	util/u_resource.c \
 	util/u_upload_mgr.c \
 	util/u_vbuf_mgr.c \
+	vl/vl_context.c \
 	vl/vl_bitstream_parser.c \
 	vl/vl_mpeg12_mc_renderer.c \
-	vl/vl_mpeg12_context.c \
+	vl/vl_mpeg12_decoder.c \
 	vl/vl_compositor.c \
 	vl/vl_csc.c \
         vl/vl_idct.c \
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 45e9cea9f66..46579a88ba7 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -484,13 +484,20 @@ vl_compositor_set_buffer_layer(struct pipe_video_compositor *compositor,
                                struct pipe_video_rect *dst_rect)
 {
    struct vl_compositor *c = (struct vl_compositor *)compositor;
+   struct pipe_sampler_view **sampler_views;
+   unsigned i;
+
    assert(compositor && buffer);
 
    assert(layer < VL_COMPOSITOR_MAX_LAYERS);
 
    c->used_layers |= 1 << layer;
    c->layers[layer].fs = c->fs_video_buffer;
-   buffer->get_sampler_views(buffer, c->layers[layer].sampler_views);
+
+   sampler_views = buffer->get_sampler_views(buffer);
+   for (i = 0; i < 3; ++i)
+      pipe_sampler_view_reference(&c->layers[layer].sampler_views[i], sampler_views[i]);
+
    c->layers[layer].src_rect = src_rect ? *src_rect : default_rect(&c->layers[layer]);
    c->layers[layer].dst_rect = dst_rect ? *dst_rect : default_rect(&c->layers[layer]);
 }
diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
new file mode 100644
index 00000000000..e352475cb8d
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -0,0 +1,264 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <pipe/p_video_context.h>
+
+#include <util/u_memory.h>
+#include <util/u_rect.h>
+#include <util/u_video.h>
+
+#include "vl_context.h"
+#include "vl_compositor.h"
+#include "vl_mpeg12_decoder.h"
+
+static void
+vl_context_destroy(struct pipe_video_context *context)
+{
+   struct vl_context *ctx = (struct vl_context*)context;
+
+   assert(context);
+
+   ctx->pipe->destroy(ctx->pipe);
+
+   FREE(ctx);
+}
+
+static int
+vl_context_get_param(struct pipe_video_context *context, int param)
+{
+   struct vl_context *ctx = (struct vl_context*)context;
+
+   assert(context);
+
+   if (param == PIPE_CAP_NPOT_TEXTURES)
+      return !ctx->pot_buffers;
+
+   debug_printf("vl_context: Unknown PIPE_CAP %d\n", param);
+   return 0;
+}
+
+static boolean
+vl_context_is_format_supported(struct pipe_video_context *context,
+                               enum pipe_format format,
+                               unsigned usage)
+{
+   struct vl_context *ctx = (struct vl_context*)context;
+
+   assert(context);
+
+   return ctx->pipe->screen->is_format_supported(ctx->pipe->screen, format,
+                                                 PIPE_TEXTURE_2D,
+                                                 0, usage);
+}
+
+static struct pipe_surface *
+vl_context_create_surface(struct pipe_video_context *context,
+                          struct pipe_resource *resource,
+                          const struct pipe_surface *templ)
+{
+   struct vl_context *ctx = (struct vl_context*)context;
+
+   assert(ctx);
+
+   return ctx->pipe->create_surface(ctx->pipe, resource, templ);
+}
+
+static struct pipe_sampler_view *
+vl_context_create_sampler_view(struct pipe_video_context *context,
+                               struct pipe_resource *resource,
+                               const struct pipe_sampler_view *templ)
+{
+   struct vl_context *ctx = (struct vl_context*)context;
+
+   assert(ctx);
+
+   return ctx->pipe->create_sampler_view(ctx->pipe, resource, templ);
+}
+
+static void
+vl_context_upload_sampler(struct pipe_video_context *context,
+                          struct pipe_sampler_view *dst,
+                          const struct pipe_box *dst_box,
+                          const void *src, unsigned src_stride,
+                          unsigned src_x, unsigned src_y)
+{
+   struct vl_context *ctx = (struct vl_context*)context;
+   struct pipe_transfer *transfer;
+   void *map;
+
+   assert(context);
+   assert(dst);
+   assert(dst_box);
+   assert(src);
+
+   transfer = ctx->pipe->get_transfer(ctx->pipe, dst->texture, 0, PIPE_TRANSFER_WRITE, dst_box);
+   if (!transfer)
+      return;
+
+   map = ctx->pipe->transfer_map(ctx->pipe, transfer);
+   if (!transfer)
+      goto error_map;
+
+   util_copy_rect(map, dst->texture->format, transfer->stride, 0, 0,
+                  dst_box->width, dst_box->height,
+                  src, src_stride, src_x, src_y);
+
+   ctx->pipe->transfer_unmap(ctx->pipe, transfer);
+
+error_map:
+   ctx->pipe->transfer_destroy(ctx->pipe, transfer);
+}
+
+static void
+vl_context_clear_sampler(struct pipe_video_context *context,
+                         struct pipe_sampler_view *dst,
+                         const struct pipe_box *dst_box,
+                         const float *rgba)
+{
+   struct vl_context *ctx = (struct vl_context*)context;
+   struct pipe_transfer *transfer;
+   union util_color uc;
+   void *map;
+   unsigned i;
+
+   assert(context);
+   assert(dst);
+   assert(dst_box);
+   assert(rgba);
+
+   transfer = ctx->pipe->get_transfer(ctx->pipe, dst->texture, 0, PIPE_TRANSFER_WRITE, dst_box);
+   if (!transfer)
+      return;
+
+   map = ctx->pipe->transfer_map(ctx->pipe, transfer);
+   if (!transfer)
+      goto error_map;
+
+   for ( i = 0; i < 4; ++i)
+      uc.f[i] = rgba[i];
+
+   util_fill_rect(map, dst->texture->format, transfer->stride, 0, 0,
+                  dst_box->width, dst_box->height, &uc);
+
+   ctx->pipe->transfer_unmap(ctx->pipe, transfer);
+
+error_map:
+   ctx->pipe->transfer_destroy(ctx->pipe, transfer);
+}
+
+static struct pipe_video_decoder *
+vl_context_create_decoder(struct pipe_video_context *context,
+                          enum pipe_video_profile profile,
+                          enum pipe_video_chroma_format chroma_format,
+                          unsigned width, unsigned height)
+{
+   struct vl_context *ctx = (struct vl_context*)context;
+   unsigned buffer_width, buffer_height;
+
+   assert(context);
+   assert(width > 0 && height > 0);
+
+   buffer_width = ctx->pot_buffers ? util_next_power_of_two(width) : width;
+   buffer_height = ctx->pot_buffers ? util_next_power_of_two(height) : height;
+
+   switch (u_reduce_video_profile(profile)) {
+      case PIPE_VIDEO_CODEC_MPEG12:
+         return vl_create_mpeg12_decoder(context, ctx->pipe, profile, chroma_format,
+                                         buffer_width, buffer_height);
+      default:
+         return NULL;
+   }
+   return NULL;
+}
+
+static struct pipe_video_buffer *
+vl_context_create_buffer(struct pipe_video_context *context,
+                         enum pipe_format buffer_format,
+                         enum pipe_video_chroma_format chroma_format,
+                         unsigned width, unsigned height)
+{
+   const enum pipe_format resource_formats[3] = {
+      PIPE_FORMAT_R8_SNORM,
+      PIPE_FORMAT_R8_SNORM,
+      PIPE_FORMAT_R8_SNORM
+   };
+
+   struct vl_context *ctx = (struct vl_context*)context;
+   unsigned buffer_width, buffer_height;
+
+   assert(context);
+   assert(width > 0 && height > 0);
+   assert(buffer_format == PIPE_FORMAT_YV12);
+
+   buffer_width = ctx->pot_buffers ? util_next_power_of_two(width) : width;
+   buffer_height = ctx->pot_buffers ? util_next_power_of_two(height) : height;
+
+   return vl_video_buffer_init(context, ctx->pipe,
+                               buffer_width, buffer_height, 1,
+                               chroma_format, 3,
+                               resource_formats,
+                               PIPE_USAGE_STATIC);
+}
+
+static struct pipe_video_compositor *
+vl_context_create_compositor(struct pipe_video_context *context)
+{
+   struct vl_context *ctx = (struct vl_context*)context;
+
+   assert(context);
+
+   return vl_compositor_init(context, ctx->pipe);
+}
+
+struct pipe_video_context *
+vl_create_context(struct pipe_context *pipe, bool pot_buffers)
+{
+   struct vl_context *ctx;
+
+   ctx = CALLOC_STRUCT(vl_context);
+
+   if (!ctx)
+      return NULL;
+
+   ctx->base.screen = pipe->screen;
+
+   ctx->base.destroy = vl_context_destroy;
+   ctx->base.get_param = vl_context_get_param;
+   ctx->base.is_format_supported = vl_context_is_format_supported;
+   ctx->base.create_surface = vl_context_create_surface;
+   ctx->base.create_sampler_view = vl_context_create_sampler_view;
+   ctx->base.clear_sampler = vl_context_clear_sampler;
+   ctx->base.upload_sampler = vl_context_upload_sampler;
+   ctx->base.create_decoder = vl_context_create_decoder;
+   ctx->base.create_buffer = vl_context_create_buffer;
+   ctx->base.create_compositor = vl_context_create_compositor;
+
+   ctx->pipe = pipe;
+   ctx->pot_buffers = pot_buffers;
+
+   return &ctx->base;
+}
diff --git a/src/gallium/auxiliary/vl/vl_context.h b/src/gallium/auxiliary/vl/vl_context.h
new file mode 100644
index 00000000000..a4504871e65
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_context.h
@@ -0,0 +1,49 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef VL_CONTEXT_H
+#define VL_CONTEXT_H
+
+#include <pipe/p_video_context.h>
+
+struct pipe_screen;
+struct pipe_context;
+
+struct vl_context
+{
+   struct pipe_video_context base;
+   struct pipe_context *pipe;
+   bool pot_buffers;
+};
+
+/* drivers can call this function in their pipe_video_context constructors and pass it
+   an accelerated pipe_context along with suitable buffering modes, etc */
+struct pipe_video_context *
+vl_create_context(struct pipe_context *pipe, bool pot_buffers);
+
+#endif /* VL_CONTEXT_H */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.c b/src/gallium/auxiliary/vl/vl_mpeg12_context.c
deleted file mode 100644
index a83c240bcd9..00000000000
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.c
+++ /dev/null
@@ -1,723 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "util/u_inlines.h"
-#include "util/u_memory.h"
-
-#include "vl_mpeg12_context.h"
-#include "vl_defines.h"
-#include <pipe/p_shader_tokens.h>
-#include <util/u_inlines.h>
-#include <util/u_memory.h>
-#include <util/u_keymap.h>
-#include <util/u_rect.h>
-#include <util/u_video.h>
-#include <util/u_surface.h>
-#include <util/u_sampler.h>
-
-static const unsigned const_empty_block_mask_420[3][2][2] = {
-        { { 0x20, 0x10 },  { 0x08, 0x04 } },
-        { { 0x02, 0x02 },  { 0x02, 0x02 } },
-        { { 0x01, 0x01 },  { 0x01, 0x01 } }
-};
-
-static void
-upload_buffer(struct vl_mpeg12_context *ctx,
-              struct vl_mpeg12_buffer *buffer,
-              struct pipe_mpeg12_macroblock *mb)
-{
-   short *blocks;
-   unsigned tb, x, y;
-
-   assert(ctx);
-   assert(buffer);
-   assert(mb);
-
-   blocks = mb->blocks;
-
-   for (y = 0; y < 2; ++y) {
-      for (x = 0; x < 2; ++x, ++tb) {
-         if (mb->cbp & (*ctx->empty_block_mask)[0][y][x]) {
-            vl_idct_add_block(&buffer->idct[0], mb->mbx * 2 + x, mb->mby * 2 + y, blocks);
-            blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
-         }
-      }
-   }
-
-   /* TODO: Implement 422, 444 */
-   assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-
-   for (tb = 1; tb < 3; ++tb) {
-      if (mb->cbp & (*ctx->empty_block_mask)[tb][0][0]) {
-         vl_idct_add_block(&buffer->idct[tb], mb->mbx, mb->mby, blocks);
-         blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
-      }
-   }
-}
-
-static void
-vl_mpeg12_buffer_destroy(struct pipe_video_buffer *buffer)
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)buf->base.context;
-   assert(buf && ctx);
-
-   vl_video_buffer_cleanup(&buf->idct_source);
-   vl_video_buffer_cleanup(&buf->idct_2_mc);
-   vl_video_buffer_cleanup(&buf->render_result);
-   vl_vb_cleanup(&buf->vertex_stream);
-   vl_idct_cleanup_buffer(&ctx->idct_y, &buf->idct[0]);
-   vl_idct_cleanup_buffer(&ctx->idct_c, &buf->idct[1]);
-   vl_idct_cleanup_buffer(&ctx->idct_c, &buf->idct[2]);
-   vl_mpeg12_mc_cleanup_buffer(&buf->mc[0]);
-   vl_mpeg12_mc_cleanup_buffer(&buf->mc[1]);
-   vl_mpeg12_mc_cleanup_buffer(&buf->mc[2]);
-
-   FREE(buf);
-}
-
-static void
-vl_mpeg12_buffer_map(struct pipe_video_buffer *buffer)
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_context *ctx;
-   assert(buf);
-
-   ctx = (struct vl_mpeg12_context *)buf->base.context;
-   assert(ctx);
-
-   vl_vb_map(&buf->vertex_stream, ctx->pipe);
-   vl_idct_map_buffers(&ctx->idct_y, &buf->idct[0]);
-   vl_idct_map_buffers(&ctx->idct_c, &buf->idct[1]);
-   vl_idct_map_buffers(&ctx->idct_c, &buf->idct[2]);
-}
-
-static void
-vl_mpeg12_buffer_add_macroblocks(struct pipe_video_buffer *buffer,
-                                 unsigned num_macroblocks,
-                                 struct pipe_macroblock *macroblocks)
-{
-   struct pipe_mpeg12_macroblock *mpeg12_macroblocks = (struct pipe_mpeg12_macroblock*)macroblocks;
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_context *ctx;
-   unsigned i;
-
-   assert(buf);
-
-   ctx =  (struct vl_mpeg12_context*)buf->base.context;
-   assert(ctx);
-
-   assert(num_macroblocks);
-   assert(macroblocks);
-   assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
-
-   for ( i = 0; i < num_macroblocks; ++i ) {
-      vl_vb_add_block(&buf->vertex_stream, &mpeg12_macroblocks[i], ctx->empty_block_mask);
-      upload_buffer(ctx, buf, &mpeg12_macroblocks[i]);
-   }
-}
-
-static void
-vl_mpeg12_buffer_unmap(struct pipe_video_buffer *buffer)
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_context *ctx;
-   assert(buf);
-
-   ctx = (struct vl_mpeg12_context *)buf->base.context;
-   assert(ctx);
-
-   vl_vb_unmap(&buf->vertex_stream, ctx->pipe);
-   vl_idct_unmap_buffers(&ctx->idct_y, &buf->idct[0]);
-   vl_idct_unmap_buffers(&ctx->idct_c, &buf->idct[1]);
-   vl_idct_unmap_buffers(&ctx->idct_c, &buf->idct[2]);
-}
-
-static void
-vl_mpeg12_buffer_flush(struct pipe_video_buffer *buffer,
-                       struct pipe_video_buffer *refs[2],
-                       struct pipe_fence_handle **fence)
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
-   struct vl_mpeg12_buffer *past = (struct vl_mpeg12_buffer *)refs[0];
-   struct vl_mpeg12_buffer *future = (struct vl_mpeg12_buffer *)refs[1];
-
-   vl_surfaces *surfaces;
-   vl_sampler_views *sv_past;
-   vl_sampler_views *sv_future;
-
-   struct pipe_sampler_view *sv_refs[2];
-   unsigned ne_start, ne_num, e_start, e_num;
-   struct vl_mpeg12_context *ctx;
-   unsigned i;
-
-   assert(buf);
-
-   ctx = (struct vl_mpeg12_context *)buf->base.context;
-   assert(ctx);
-
-   surfaces = vl_video_buffer_surfaces(&buf->render_result);
-
-   sv_past = past ? vl_video_buffer_sampler_views(&past->render_result) : NULL;
-   sv_future = future ? vl_video_buffer_sampler_views(&future->render_result) : NULL;
-
-   vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
-
-   ctx->pipe->set_vertex_buffers(ctx->pipe, 2, buf->vertex_bufs.all);
-   ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
-
-   for (i = 0; i < VL_MAX_PLANES; ++i) {
-      ctx->pipe->bind_vertex_elements_state(ctx->pipe, ctx->ves[i]);
-      vl_idct_flush(i == 0 ? &ctx->idct_y : &ctx->idct_c, &buf->idct[i], ne_num);
-
-      sv_refs[0] = sv_past ? (*sv_past)[i] : NULL;
-      sv_refs[1] = sv_future ? (*sv_future)[i] : NULL;
-
-      vl_mpeg12_mc_renderer_flush(&ctx->mc, &buf->mc[i], (*surfaces)[i],
-                                  sv_refs, ne_start, ne_num, e_start, e_num, fence);
-   }
-}
-
-static void
-vl_mpeg12_buffer_get_sampler_views(struct pipe_video_buffer *buffer,
-                                   struct pipe_sampler_view *sampler_views[3])
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   vl_sampler_views *samplers;
-   unsigned i;
-
-   assert(buf);
-
-   samplers = vl_video_buffer_sampler_views(&buf->render_result);
-
-   assert(samplers);
-
-   for (i = 0; i < VL_MAX_PLANES; ++i)
-      pipe_sampler_view_reference(&sampler_views[i], (*samplers)[i]);
-}
-
-static void
-vl_mpeg12_destroy(struct pipe_video_context *vpipe)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-
-   /* Asserted in softpipe_delete_fs_state() for some reason */
-   ctx->pipe->bind_vs_state(ctx->pipe, NULL);
-   ctx->pipe->bind_fs_state(ctx->pipe, NULL);
-
-   ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend);
-   ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast);
-   ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
-
-   vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
-   vl_idct_cleanup(&ctx->idct_y);
-   vl_idct_cleanup(&ctx->idct_c);
-   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves[0]);
-   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves[1]);
-   ctx->pipe->delete_vertex_elements_state(ctx->pipe, ctx->ves[2]);
-   pipe_resource_reference(&ctx->quads.buffer, NULL);
-   ctx->pipe->destroy(ctx->pipe);
-
-   FREE(ctx);
-}
-
-static int
-vl_mpeg12_get_param(struct pipe_video_context *vpipe, int param)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-
-   if (param == PIPE_CAP_NPOT_TEXTURES)
-      return !ctx->pot_buffers;
-
-   debug_printf("vl_mpeg12_context: Unknown PIPE_CAP %d\n", param);
-   return 0;
-}
-
-static struct pipe_surface *
-vl_mpeg12_create_surface(struct pipe_video_context *vpipe,
-                         struct pipe_resource *resource,
-                         const struct pipe_surface *templ)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(ctx);
-
-   return ctx->pipe->create_surface(ctx->pipe, resource, templ);
-}
-
-static struct pipe_sampler_view *
-vl_mpeg12_create_sampler_view(struct pipe_video_context *vpipe,
-                              struct pipe_resource *resource,
-                              const struct pipe_sampler_view *templ)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(ctx);
-
-   return ctx->pipe->create_sampler_view(ctx->pipe, resource, templ);
-}
-
-static struct pipe_video_buffer *
-vl_mpeg12_create_buffer(struct pipe_video_context *vpipe)
-{
-   const enum pipe_format idct_source_formats[3] = {
-      PIPE_FORMAT_R16G16B16A16_SNORM,
-      PIPE_FORMAT_R16G16B16A16_SNORM,
-      PIPE_FORMAT_R16G16B16A16_SNORM
-   };
-
-   const enum pipe_format idct_2_mc_formats[3] = {
-      PIPE_FORMAT_R16_SNORM,
-      PIPE_FORMAT_R16_SNORM,
-      PIPE_FORMAT_R16_SNORM
-   };
-
-   const enum pipe_format render_result_formats[3] = {
-      PIPE_FORMAT_R8_SNORM,
-      PIPE_FORMAT_R8_SNORM,
-      PIPE_FORMAT_R8_SNORM
-   };
-
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-   struct vl_mpeg12_buffer *buffer;
-
-   vl_sampler_views *idct_views, *mc_views;
-   vl_surfaces *idct_surfaces;
-
-   assert(ctx);
-
-   buffer = CALLOC_STRUCT(vl_mpeg12_buffer);
-   if (buffer == NULL)
-      return NULL;
-
-   buffer->base.context = vpipe;
-   buffer->base.destroy = vl_mpeg12_buffer_destroy;
-   buffer->base.map = vl_mpeg12_buffer_map;
-   buffer->base.add_macroblocks = vl_mpeg12_buffer_add_macroblocks;
-   buffer->base.unmap = vl_mpeg12_buffer_unmap;
-   buffer->base.flush = vl_mpeg12_buffer_flush;
-   buffer->base.get_sampler_views = vl_mpeg12_buffer_get_sampler_views;
-
-   buffer->vertex_bufs.individual.quad.stride = ctx->quads.stride;
-   buffer->vertex_bufs.individual.quad.buffer_offset = ctx->quads.buffer_offset;
-   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, ctx->quads.buffer);
-
-   buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, ctx->pipe,
-                                                      ctx->buffer_width / MACROBLOCK_WIDTH *
-                                                      ctx->buffer_height / MACROBLOCK_HEIGHT);
-   if (!buffer->vertex_bufs.individual.stream.buffer)
-      goto error_vertex_stream;
-
-   if (!vl_video_buffer_init(&buffer->idct_source, ctx->pipe,
-                             ctx->buffer_width / 4, ctx->buffer_height, 1,
-                             ctx->base.chroma_format, 3,
-                             idct_source_formats,
-                             PIPE_USAGE_STREAM))
-      goto error_idct_source;
-
-   if (!vl_video_buffer_init(&buffer->idct_2_mc, ctx->pipe,
-                             ctx->buffer_width, ctx->buffer_height, 1,
-                             ctx->base.chroma_format, 3,
-                             idct_2_mc_formats,
-                             PIPE_USAGE_STATIC))
-      goto error_idct_2_mc;
-
-   if (!vl_video_buffer_init(&buffer->render_result, ctx->pipe,
-                             ctx->buffer_width, ctx->buffer_height, 1,
-                             ctx->base.chroma_format, 3,
-                             render_result_formats,
-                             PIPE_USAGE_STATIC))
-      goto error_render_result;
-
-   idct_views = vl_video_buffer_sampler_views(&buffer->idct_source);
-   if (!idct_views)
-      goto error_idct_views;
-
-   idct_surfaces = vl_video_buffer_surfaces(&buffer->idct_2_mc);
-   if (!idct_surfaces)
-      goto error_idct_surfaces;
-
-   if (!vl_idct_init_buffer(&ctx->idct_y, &buffer->idct[0],
-                            (*idct_views)[0], (*idct_surfaces)[0]))
-      goto error_idct_y;
-
-   if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct[1],
-                            (*idct_views)[1], (*idct_surfaces)[1]))
-      goto error_idct_cb;
-
-   if (!vl_idct_init_buffer(&ctx->idct_c, &buffer->idct[2],
-                            (*idct_views)[2], (*idct_surfaces)[2]))
-      goto error_idct_cr;
-
-   mc_views = vl_video_buffer_sampler_views(&buffer->idct_2_mc);
-   if (!mc_views)
-      goto error_mc_views;
-
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc[0], (*mc_views)[0]))
-      goto error_mc_y;
-
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc[1], (*mc_views)[1]))
-      goto error_mc_cb;
-
-   if(!vl_mpeg12_mc_init_buffer(&ctx->mc, &buffer->mc[2], (*mc_views)[2]))
-      goto error_mc_cr;
-
-   return &buffer->base;
-
-error_mc_cr:
-   vl_mpeg12_mc_cleanup_buffer(&buffer->mc[1]);
-
-error_mc_cb:
-   vl_mpeg12_mc_cleanup_buffer(&buffer->mc[0]);
-
-error_mc_y:
-error_mc_views:
-   vl_idct_cleanup_buffer(&ctx->idct_c, &buffer->idct[2]);
-
-error_idct_cr:
-   vl_idct_cleanup_buffer(&ctx->idct_c, &buffer->idct[1]);
-
-error_idct_cb:
-   vl_idct_cleanup_buffer(&ctx->idct_y, &buffer->idct[0]);
-
-error_idct_y:
-error_idct_surfaces:
-error_idct_views:
-   vl_video_buffer_cleanup(&buffer->render_result);
-
-error_render_result:
-   vl_video_buffer_cleanup(&buffer->idct_2_mc);
-
-error_idct_2_mc:
-   vl_video_buffer_cleanup(&buffer->idct_source);
-
-error_idct_source:
-   vl_vb_cleanup(&buffer->vertex_stream);
-
-error_vertex_stream:
-   FREE(buffer);
-   return NULL;
-}
-
-static boolean
-vl_mpeg12_is_format_supported(struct pipe_video_context *vpipe,
-                              enum pipe_format format,
-                              unsigned usage)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-
-   return ctx->pipe->screen->is_format_supported(ctx->pipe->screen, format,
-                                                 PIPE_TEXTURE_2D,
-                                                 0, usage);
-}
-
-static void
-vl_mpeg12_clear_sampler(struct pipe_video_context *vpipe,
-                        struct pipe_sampler_view *dst,
-                        const struct pipe_box *dst_box,
-                        const float *rgba)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-   struct pipe_transfer *transfer;
-   union util_color uc;
-   void *map;
-   unsigned i;
-
-   assert(vpipe);
-   assert(dst);
-   assert(dst_box);
-   assert(rgba);
-
-   transfer = ctx->pipe->get_transfer(ctx->pipe, dst->texture, 0, PIPE_TRANSFER_WRITE, dst_box);
-   if (!transfer)
-      return;
-
-   map = ctx->pipe->transfer_map(ctx->pipe, transfer);
-   if (!transfer)
-      goto error_map;
-
-   for ( i = 0; i < 4; ++i)
-      uc.f[i] = rgba[i];
-
-   util_fill_rect(map, dst->texture->format, transfer->stride, 0, 0,
-                  dst_box->width, dst_box->height, &uc);
-
-   ctx->pipe->transfer_unmap(ctx->pipe, transfer);
-
-error_map:
-   ctx->pipe->transfer_destroy(ctx->pipe, transfer);
-}
-
-static void
-vl_mpeg12_upload_sampler(struct pipe_video_context *vpipe,
-                         struct pipe_sampler_view *dst,
-                         const struct pipe_box *dst_box,
-                         const void *src, unsigned src_stride,
-                         unsigned src_x, unsigned src_y)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-   struct pipe_transfer *transfer;
-   void *map;
-
-   assert(vpipe);
-   assert(dst);
-   assert(dst_box);
-   assert(src);
-
-   transfer = ctx->pipe->get_transfer(ctx->pipe, dst->texture, 0, PIPE_TRANSFER_WRITE, dst_box);
-   if (!transfer)
-      return;
-
-   map = ctx->pipe->transfer_map(ctx->pipe, transfer);
-   if (!transfer)
-      goto error_map;
-
-   util_copy_rect(map, dst->texture->format, transfer->stride, 0, 0,
-                  dst_box->width, dst_box->height,
-                  src, src_stride, src_x, src_y);
-
-   ctx->pipe->transfer_unmap(ctx->pipe, transfer);
-
-error_map:
-   ctx->pipe->transfer_destroy(ctx->pipe, transfer);
-}
-
-static struct pipe_video_compositor *
-vl_mpeg12_create_compositor(struct pipe_video_context *vpipe)
-{
-   struct vl_mpeg12_context *ctx = (struct vl_mpeg12_context*)vpipe;
-
-   assert(vpipe);
-
-   return vl_compositor_init(vpipe, ctx->pipe);
-}
-
-static bool
-init_pipe_state(struct vl_mpeg12_context *ctx)
-{
-   struct pipe_rasterizer_state rast;
-   struct pipe_blend_state blend;
-   struct pipe_depth_stencil_alpha_state dsa;
-   unsigned i;
-
-   assert(ctx);
-
-   memset(&rast, 0, sizeof rast);
-   rast.flatshade = 1;
-   rast.flatshade_first = 0;
-   rast.light_twoside = 0;
-   rast.front_ccw = 1;
-   rast.cull_face = PIPE_FACE_NONE;
-   rast.fill_back = PIPE_POLYGON_MODE_FILL;
-   rast.fill_front = PIPE_POLYGON_MODE_FILL;
-   rast.offset_point = 0;
-   rast.offset_line = 0;
-   rast.scissor = 0;
-   rast.poly_smooth = 0;
-   rast.poly_stipple_enable = 0;
-   rast.sprite_coord_enable = 0;
-   rast.point_size_per_vertex = 0;
-   rast.multisample = 0;
-   rast.line_smooth = 0;
-   rast.line_stipple_enable = 0;
-   rast.line_stipple_factor = 0;
-   rast.line_stipple_pattern = 0;
-   rast.line_last_pixel = 0;
-   rast.line_width = 1;
-   rast.point_smooth = 0;
-   rast.point_quad_rasterization = 0;
-   rast.point_size_per_vertex = 1;
-   rast.offset_units = 1;
-   rast.offset_scale = 1;
-   rast.gl_rasterization_rules = 1;
-
-   ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
-   ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
-
-   memset(&blend, 0, sizeof blend);
-
-   blend.independent_blend_enable = 0;
-   blend.rt[0].blend_enable = 0;
-   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
-   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
-   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
-   blend.logicop_enable = 0;
-   blend.logicop_func = PIPE_LOGICOP_CLEAR;
-   /* Needed to allow color writes to FB, even if blending disabled */
-   blend.rt[0].colormask = PIPE_MASK_RGBA;
-   blend.dither = 0;
-   ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend);
-
-   memset(&dsa, 0, sizeof dsa);
-   dsa.depth.enabled = 0;
-   dsa.depth.writemask = 0;
-   dsa.depth.func = PIPE_FUNC_ALWAYS;
-   for (i = 0; i < 2; ++i) {
-      dsa.stencil[i].enabled = 0;
-      dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
-      dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
-      dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
-      dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
-      dsa.stencil[i].valuemask = 0;
-      dsa.stencil[i].writemask = 0;
-   }
-   dsa.alpha.enabled = 0;
-   dsa.alpha.func = PIPE_FUNC_ALWAYS;
-   dsa.alpha.ref_value = 0;
-   ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa);
-   ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
-
-   return true;
-}
-
-static bool
-init_idct(struct vl_mpeg12_context *ctx, unsigned buffer_width, unsigned buffer_height)
-{
-   unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
-   struct pipe_sampler_view *idct_matrix;
-
-   /* TODO: Implement 422, 444 */
-   assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-   ctx->empty_block_mask = &const_empty_block_mask_420;
-
-   if (!(idct_matrix = vl_idct_upload_matrix(ctx->pipe)))
-      goto error_idct_matrix;
-
-   if (!vl_idct_init(&ctx->idct_y, ctx->pipe, buffer_width, buffer_height,
-                     2, 2, idct_matrix))
-      goto error_idct_y;
-
-   if (ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      chroma_width = buffer_width / 2;
-      chroma_height = buffer_height / 2;
-      chroma_blocks_x = 1;
-      chroma_blocks_y = 1;
-   } else if (ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
-      chroma_width = buffer_width;
-      chroma_height = buffer_height / 2;
-      chroma_blocks_x = 2;
-      chroma_blocks_y = 1;
-   } else {
-      chroma_width = buffer_width;
-      chroma_height = buffer_height;
-      chroma_blocks_x = 2;
-      chroma_blocks_y = 2;
-   }
-
-   if(!vl_idct_init(&ctx->idct_c, ctx->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, idct_matrix))
-      goto error_idct_c;
-
-   pipe_sampler_view_reference(&idct_matrix, NULL);
-   return true;
-
-error_idct_c:
-   vl_idct_cleanup(&ctx->idct_y);
-
-error_idct_y:
-   pipe_sampler_view_reference(&idct_matrix, NULL);
-
-error_idct_matrix:
-   return false;
-}
-
-struct pipe_video_context *
-vl_create_mpeg12_context(struct pipe_context *pipe,
-                         enum pipe_video_profile profile,
-                         enum pipe_video_chroma_format chroma_format,
-                         unsigned width, unsigned height,
-                         bool pot_buffers)
-{
-   struct vl_mpeg12_context *ctx;
-
-   assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
-
-   ctx = CALLOC_STRUCT(vl_mpeg12_context);
-
-   if (!ctx)
-      return NULL;
-
-   ctx->base.profile = profile;
-   ctx->base.chroma_format = chroma_format;
-   ctx->base.width = width;
-   ctx->base.height = height;
-
-   ctx->base.screen = pipe->screen;
-
-   ctx->base.destroy = vl_mpeg12_destroy;
-   ctx->base.get_param = vl_mpeg12_get_param;
-   ctx->base.is_format_supported = vl_mpeg12_is_format_supported;
-   ctx->base.create_surface = vl_mpeg12_create_surface;
-   ctx->base.create_sampler_view = vl_mpeg12_create_sampler_view;
-   ctx->base.create_buffer = vl_mpeg12_create_buffer;
-   ctx->base.clear_sampler = vl_mpeg12_clear_sampler;
-   ctx->base.upload_sampler = vl_mpeg12_upload_sampler;
-   ctx->base.create_compositor = vl_mpeg12_create_compositor;
-
-   ctx->pipe = pipe;
-   ctx->pot_buffers = pot_buffers;
-
-   ctx->quads = vl_vb_upload_quads(ctx->pipe, 2, 2);
-   ctx->ves[0] = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_X);
-   ctx->ves[1] = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Y);
-   ctx->ves[2] = vl_vb_get_elems_state(ctx->pipe, TGSI_SWIZZLE_Z);
-
-   ctx->buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
-   ctx->buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
-
-   if (!init_idct(ctx, ctx->buffer_width, ctx->buffer_height))
-      goto error_idct;
-
-   if (!vl_mpeg12_mc_renderer_init(&ctx->mc, ctx->pipe, ctx->buffer_width, ctx->buffer_height))
-      goto error_mc;
-
-   if (!init_pipe_state(ctx))
-      goto error_pipe_state;
-
-   return &ctx->base;
-
-error_pipe_state:
-   vl_mpeg12_mc_renderer_cleanup(&ctx->mc);
-
-error_mc:
-   vl_idct_cleanup(&ctx->idct_y);
-   vl_idct_cleanup(&ctx->idct_c);
-
-error_idct:
-   ctx->pipe->destroy(ctx->pipe);
-   FREE(ctx);
-   return NULL;
-}
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_context.h b/src/gallium/auxiliary/vl/vl_mpeg12_context.h
deleted file mode 100644
index 94a5dad5571..00000000000
--- a/src/gallium/auxiliary/vl/vl_mpeg12_context.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef VL_MPEG12_CONTEXT_H
-#define VL_MPEG12_CONTEXT_H
-
-#include <pipe/p_video_context.h>
-#include "vl_idct.h"
-#include "vl_mpeg12_mc_renderer.h"
-#include "vl_compositor.h"
-#include "vl_video_buffer.h"
-#include "vl_vertex_buffers.h"
-
-struct pipe_screen;
-struct pipe_context;
-
-struct vl_mpeg12_context
-{
-   struct pipe_video_context base;
-   struct pipe_context *pipe;
-   bool pot_buffers;
-   unsigned buffer_width, buffer_height;
-
-   const unsigned (*empty_block_mask)[3][2][2];
-
-   struct pipe_vertex_buffer quads;
-   void *ves[VL_MAX_PLANES];
-
-   struct vl_idct idct_y, idct_c;
-   struct vl_mpeg12_mc_renderer mc;
-
-   void *rast;
-   void *dsa;
-   void *blend;
-};
-
-struct vl_mpeg12_buffer
-{
-   struct pipe_video_buffer base;
-
-   struct vl_video_buffer idct_source;
-   struct vl_video_buffer idct_2_mc;
-   struct vl_video_buffer render_result;
-
-   struct vl_vertex_buffer vertex_stream;
-
-   union
-   {
-      struct pipe_vertex_buffer all[2];
-      struct {
-         struct pipe_vertex_buffer quad, stream;
-      } individual;
-   } vertex_bufs;
-
-   struct vl_idct_buffer idct[VL_MAX_PLANES];
-   struct vl_mpeg12_mc_buffer mc[VL_MAX_PLANES];
-};
-
-/* drivers can call this function in their pipe_video_context constructors and pass it
-   an accelerated pipe_context along with suitable buffering modes, etc */
-struct pipe_video_context *
-vl_create_mpeg12_context(struct pipe_context *pipe,
-                         enum pipe_video_profile profile,
-                         enum pipe_video_chroma_format chroma_format,
-                         unsigned width, unsigned height,
-                         bool pot_buffers);
-
-#endif /* VL_MPEG12_CONTEXT_H */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
new file mode 100644
index 00000000000..c07b1bb369e
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -0,0 +1,561 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+//#include <pipe/p_shader_tokens.h>
+
+//#include "util/u_inlines.h"
+
+//#include <util/u_inlines.h>
+//#include <util/u_memory.h>
+//#include <util/u_keymap.h>
+
+//#include <util/u_video.h>
+//#include <util/u_surface.h>
+//#include <util/u_sampler.h>
+
+#include <util/u_memory.h>
+#include <util/u_rect.h>
+#include <util/u_video.h>
+
+#include "vl_mpeg12_decoder.h"
+#include "vl_defines.h"
+
+static const unsigned const_empty_block_mask_420[3][2][2] = {
+        { { 0x20, 0x10 },  { 0x08, 0x04 } },
+        { { 0x02, 0x02 },  { 0x02, 0x02 } },
+        { { 0x01, 0x01 },  { 0x01, 0x01 } }
+};
+
+static void
+upload_buffer(struct vl_mpeg12_decoder *ctx,
+              struct vl_mpeg12_buffer *buffer,
+              struct pipe_mpeg12_macroblock *mb)
+{
+   short *blocks;
+   unsigned tb, x, y;
+
+   assert(ctx);
+   assert(buffer);
+   assert(mb);
+
+   blocks = mb->blocks;
+
+   for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x, ++tb) {
+         if (mb->cbp & (*ctx->empty_block_mask)[0][y][x]) {
+            vl_idct_add_block(&buffer->idct[0], mb->mbx * 2 + x, mb->mby * 2 + y, blocks);
+            blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
+         }
+      }
+   }
+
+   /* TODO: Implement 422, 444 */
+   assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+
+   for (tb = 1; tb < 3; ++tb) {
+      if (mb->cbp & (*ctx->empty_block_mask)[tb][0][0]) {
+         vl_idct_add_block(&buffer->idct[tb], mb->mbx, mb->mby, blocks);
+         blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
+      }
+   }
+}
+
+static void
+vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
+   assert(buf && dec);
+
+   buf->idct_source->destroy(buf->idct_source);
+   buf->idct_2_mc->destroy(buf->idct_2_mc);
+   vl_vb_cleanup(&buf->vertex_stream);
+   vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
+   vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
+   vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
+   vl_mpeg12_mc_cleanup_buffer(&buf->mc[0]);
+   vl_mpeg12_mc_cleanup_buffer(&buf->mc[1]);
+   vl_mpeg12_mc_cleanup_buffer(&buf->mc[2]);
+
+   FREE(buf);
+}
+
+static void
+vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec;
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
+   assert(dec);
+
+   vl_vb_map(&buf->vertex_stream, dec->pipe);
+   vl_idct_map_buffers(&dec->idct_y, &buf->idct[0]);
+   vl_idct_map_buffers(&dec->idct_c, &buf->idct[1]);
+   vl_idct_map_buffers(&dec->idct_c, &buf->idct[2]);
+}
+
+static void
+vl_mpeg12_buffer_add_macroblocks(struct pipe_video_decode_buffer *buffer,
+                                 unsigned num_macroblocks,
+                                 struct pipe_macroblock *macroblocks)
+{
+   struct pipe_mpeg12_macroblock *mb = (struct pipe_mpeg12_macroblock*)macroblocks;
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec;
+   unsigned i;
+
+   assert(buf);
+
+   dec =  (struct vl_mpeg12_decoder*)buf->base.decoder;
+   assert(dec);
+
+   assert(num_macroblocks);
+   assert(macroblocks);
+   assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
+
+   for ( i = 0; i < num_macroblocks; ++i ) {
+      vl_vb_add_block(&buf->vertex_stream, &mb[i], dec->empty_block_mask);
+      upload_buffer(dec, buf, &mb[i]);
+   }
+}
+
+static void
+vl_mpeg12_buffer_unmap(struct pipe_video_decode_buffer *buffer)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec;
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
+   assert(dec);
+
+   vl_vb_unmap(&buf->vertex_stream, dec->pipe);
+   vl_idct_unmap_buffers(&dec->idct_y, &buf->idct[0]);
+   vl_idct_unmap_buffers(&dec->idct_c, &buf->idct[1]);
+   vl_idct_unmap_buffers(&dec->idct_c, &buf->idct[2]);
+}
+
+static void
+vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
+{
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
+
+   assert(decoder);
+
+   /* Asserted in softpipe_delete_fs_state() for some reason */
+   dec->pipe->bind_vs_state(dec->pipe, NULL);
+   dec->pipe->bind_fs_state(dec->pipe, NULL);
+
+   dec->pipe->delete_blend_state(dec->pipe, dec->blend);
+   dec->pipe->delete_rasterizer_state(dec->pipe, dec->rast);
+   dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa);
+
+   vl_mpeg12_mc_renderer_cleanup(&dec->mc);
+   vl_idct_cleanup(&dec->idct_y);
+   vl_idct_cleanup(&dec->idct_c);
+   dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves[0]);
+   dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves[1]);
+   dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves[2]);
+   pipe_resource_reference(&dec->quads.buffer, NULL);
+
+   FREE(dec);
+}
+
+static struct pipe_video_decode_buffer *
+vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
+{
+   const enum pipe_format idct_source_formats[3] = {
+      PIPE_FORMAT_R16G16B16A16_SNORM,
+      PIPE_FORMAT_R16G16B16A16_SNORM,
+      PIPE_FORMAT_R16G16B16A16_SNORM
+   };
+
+   const enum pipe_format idct_2_mc_formats[3] = {
+      PIPE_FORMAT_R16_SNORM,
+      PIPE_FORMAT_R16_SNORM,
+      PIPE_FORMAT_R16_SNORM
+   };
+
+   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
+   struct vl_mpeg12_buffer *buffer;
+
+   struct pipe_sampler_view **idct_views, **mc_views;
+   struct pipe_surface **idct_surfaces;
+
+   assert(dec);
+
+   buffer = CALLOC_STRUCT(vl_mpeg12_buffer);
+   if (buffer == NULL)
+      return NULL;
+
+   buffer->base.decoder = decoder;
+   buffer->base.destroy = vl_mpeg12_buffer_destroy;
+   buffer->base.map = vl_mpeg12_buffer_map;
+   buffer->base.add_macroblocks = vl_mpeg12_buffer_add_macroblocks;
+   buffer->base.unmap = vl_mpeg12_buffer_unmap;
+
+   buffer->vertex_bufs.individual.quad.stride = dec->quads.stride;
+   buffer->vertex_bufs.individual.quad.buffer_offset = dec->quads.buffer_offset;
+   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, dec->quads.buffer);
+
+   buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, dec->pipe,
+                                                      dec->base.width / MACROBLOCK_WIDTH *
+                                                      dec->base.height / MACROBLOCK_HEIGHT);
+   if (!buffer->vertex_bufs.individual.stream.buffer)
+      goto error_vertex_stream;
+
+   buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                              dec->base.width / 4, dec->base.height, 1,
+                                              dec->base.chroma_format, 3,
+                                              idct_source_formats,
+                                              PIPE_USAGE_STREAM);
+   if (!buffer->idct_source)
+      goto error_idct_source;
+
+   buffer->idct_2_mc = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                            dec->base.width, dec->base.height, 1,
+                                            dec->base.chroma_format, 3,
+                                            idct_2_mc_formats,
+                                            PIPE_USAGE_STATIC);
+   if (!buffer->idct_2_mc)
+      goto error_idct_2_mc;
+
+   idct_views = buffer->idct_source->get_sampler_views(buffer->idct_source);
+   if (!idct_views)
+      goto error_idct_views;
+
+   idct_surfaces = buffer->idct_2_mc->get_surfaces(buffer->idct_2_mc);
+   if (!idct_surfaces)
+      goto error_idct_surfaces;
+
+   if (!vl_idct_init_buffer(&dec->idct_y, &buffer->idct[0],
+                            idct_views[0], idct_surfaces[0]))
+      goto error_idct_y;
+
+   if (!vl_idct_init_buffer(&dec->idct_c, &buffer->idct[1],
+                            idct_views[1], idct_surfaces[1]))
+      goto error_idct_cb;
+
+   if (!vl_idct_init_buffer(&dec->idct_c, &buffer->idct[2],
+                            idct_views[2], idct_surfaces[2]))
+      goto error_idct_cr;
+
+   mc_views = buffer->idct_2_mc->get_sampler_views(buffer->idct_2_mc);
+   if (!mc_views)
+      goto error_mc_views;
+
+   if(!vl_mpeg12_mc_init_buffer(&dec->mc, &buffer->mc[0], mc_views[0]))
+      goto error_mc_y;
+
+   if(!vl_mpeg12_mc_init_buffer(&dec->mc, &buffer->mc[1], mc_views[1]))
+      goto error_mc_cb;
+
+   if(!vl_mpeg12_mc_init_buffer(&dec->mc, &buffer->mc[2], mc_views[2]))
+      goto error_mc_cr;
+
+   return &buffer->base;
+
+error_mc_cr:
+   vl_mpeg12_mc_cleanup_buffer(&buffer->mc[1]);
+
+error_mc_cb:
+   vl_mpeg12_mc_cleanup_buffer(&buffer->mc[0]);
+
+error_mc_y:
+error_mc_views:
+   vl_idct_cleanup_buffer(&dec->idct_c, &buffer->idct[2]);
+
+error_idct_cr:
+   vl_idct_cleanup_buffer(&dec->idct_c, &buffer->idct[1]);
+
+error_idct_cb:
+   vl_idct_cleanup_buffer(&dec->idct_y, &buffer->idct[0]);
+
+error_idct_y:
+error_idct_surfaces:
+error_idct_views:
+   buffer->idct_2_mc->destroy(buffer->idct_2_mc);
+
+error_idct_2_mc:
+   buffer->idct_source->destroy(buffer->idct_source);
+
+error_idct_source:
+   vl_vb_cleanup(&buffer->vertex_stream);
+
+error_vertex_stream:
+   FREE(buffer);
+   return NULL;
+}
+
+static void
+vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
+                               struct pipe_video_buffer *refs[2],
+                               struct pipe_video_buffer *dst,
+                               struct pipe_fence_handle **fence)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
+   struct vl_mpeg12_decoder *dec;
+
+   struct pipe_sampler_view **sv_past;
+   struct pipe_sampler_view **sv_future;
+   struct pipe_surface **surfaces;
+
+   struct pipe_sampler_view *sv_refs[2];
+   unsigned ne_start, ne_num, e_start, e_num;
+   unsigned i;
+
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
+   assert(dec);
+
+   sv_past = refs[0] ? refs[0]->get_sampler_views(refs[0]) : NULL;
+   sv_future = refs[1] ? refs[1]->get_sampler_views(refs[1]) : NULL;
+
+   surfaces = dst->get_surfaces(dst);
+
+   vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
+
+   dec->pipe->set_vertex_buffers(dec->pipe, 2, buf->vertex_bufs.all);
+   dec->pipe->bind_blend_state(dec->pipe, dec->blend);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves[i]);
+      vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], ne_num);
+
+      sv_refs[0] = sv_past ? sv_past[i] : NULL;
+      sv_refs[1] = sv_future ? sv_future[i] : NULL;
+
+      vl_mpeg12_mc_renderer_flush(&dec->mc, &buf->mc[i], surfaces[i], sv_refs,
+                                  ne_start, ne_num, e_start, e_num, fence);
+   }
+}
+
+static void
+vl_mpeg12_decoder_clear_buffer(struct pipe_video_decode_buffer *buffer)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
+   unsigned ne_start, ne_num, e_start, e_num;
+
+   assert(buf);
+
+   vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
+}
+
+static bool
+init_pipe_state(struct vl_mpeg12_decoder *dec)
+{
+   struct pipe_rasterizer_state rast;
+   struct pipe_blend_state blend;
+   struct pipe_depth_stencil_alpha_state dsa;
+   unsigned i;
+
+   assert(dec);
+
+   memset(&rast, 0, sizeof rast);
+   rast.flatshade = 1;
+   rast.flatshade_first = 0;
+   rast.light_twoside = 0;
+   rast.front_ccw = 1;
+   rast.cull_face = PIPE_FACE_NONE;
+   rast.fill_back = PIPE_POLYGON_MODE_FILL;
+   rast.fill_front = PIPE_POLYGON_MODE_FILL;
+   rast.offset_point = 0;
+   rast.offset_line = 0;
+   rast.scissor = 0;
+   rast.poly_smooth = 0;
+   rast.poly_stipple_enable = 0;
+   rast.sprite_coord_enable = 0;
+   rast.point_size_per_vertex = 0;
+   rast.multisample = 0;
+   rast.line_smooth = 0;
+   rast.line_stipple_enable = 0;
+   rast.line_stipple_factor = 0;
+   rast.line_stipple_pattern = 0;
+   rast.line_last_pixel = 0;
+   rast.line_width = 1;
+   rast.point_smooth = 0;
+   rast.point_quad_rasterization = 0;
+   rast.point_size_per_vertex = 1;
+   rast.offset_units = 1;
+   rast.offset_scale = 1;
+   rast.gl_rasterization_rules = 1;
+
+   dec->rast = dec->pipe->create_rasterizer_state(dec->pipe, &rast);
+   dec->pipe->bind_rasterizer_state(dec->pipe, dec->rast);
+
+   memset(&blend, 0, sizeof blend);
+
+   blend.independent_blend_enable = 0;
+   blend.rt[0].blend_enable = 0;
+   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.logicop_enable = 0;
+   blend.logicop_func = PIPE_LOGICOP_CLEAR;
+   /* Needed to allow color writes to FB, even if blending disabled */
+   blend.rt[0].colormask = PIPE_MASK_RGBA;
+   blend.dither = 0;
+   dec->blend = dec->pipe->create_blend_state(dec->pipe, &blend);
+
+   memset(&dsa, 0, sizeof dsa);
+   dsa.depth.enabled = 0;
+   dsa.depth.writemask = 0;
+   dsa.depth.func = PIPE_FUNC_ALWAYS;
+   for (i = 0; i < 2; ++i) {
+      dsa.stencil[i].enabled = 0;
+      dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
+      dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
+      dsa.stencil[i].valuemask = 0;
+      dsa.stencil[i].writemask = 0;
+   }
+   dsa.alpha.enabled = 0;
+   dsa.alpha.func = PIPE_FUNC_ALWAYS;
+   dsa.alpha.ref_value = 0;
+   dec->dsa = dec->pipe->create_depth_stencil_alpha_state(dec->pipe, &dsa);
+   dec->pipe->bind_depth_stencil_alpha_state(dec->pipe, dec->dsa);
+
+   return true;
+}
+
+static bool
+init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_height)
+{
+   unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
+   struct pipe_sampler_view *idct_matrix;
+
+   /* TODO: Implement 422, 444 */
+   assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+   dec->empty_block_mask = &const_empty_block_mask_420;
+
+   if (!(idct_matrix = vl_idct_upload_matrix(dec->pipe)))
+      goto error_idct_matrix;
+
+   if (!vl_idct_init(&dec->idct_y, dec->pipe, buffer_width, buffer_height,
+                     2, 2, idct_matrix))
+      goto error_idct_y;
+
+   if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+      chroma_width = buffer_width / 2;
+      chroma_height = buffer_height / 2;
+      chroma_blocks_x = 1;
+      chroma_blocks_y = 1;
+   } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
+      chroma_width = buffer_width;
+      chroma_height = buffer_height / 2;
+      chroma_blocks_x = 2;
+      chroma_blocks_y = 1;
+   } else {
+      chroma_width = buffer_width;
+      chroma_height = buffer_height;
+      chroma_blocks_x = 2;
+      chroma_blocks_y = 2;
+   }
+
+   if(!vl_idct_init(&dec->idct_c, dec->pipe, chroma_width, chroma_height,
+                    chroma_blocks_x, chroma_blocks_y, idct_matrix))
+      goto error_idct_c;
+
+   pipe_sampler_view_reference(&idct_matrix, NULL);
+   return true;
+
+error_idct_c:
+   vl_idct_cleanup(&dec->idct_y);
+
+error_idct_y:
+   pipe_sampler_view_reference(&idct_matrix, NULL);
+
+error_idct_matrix:
+   return false;
+}
+
+struct pipe_video_decoder *
+vl_create_mpeg12_decoder(struct pipe_video_context *context,
+                         struct pipe_context *pipe,
+                         enum pipe_video_profile profile,
+                         enum pipe_video_chroma_format chroma_format,
+                         unsigned width, unsigned height)
+{
+   struct vl_mpeg12_decoder *dec;
+   unsigned i;
+
+   assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
+
+   dec = CALLOC_STRUCT(vl_mpeg12_decoder);
+
+   if (!dec)
+      return NULL;
+
+   dec->base.context = context;
+   dec->base.profile = profile;
+   dec->base.chroma_format = chroma_format;
+   dec->base.width = width;
+   dec->base.height = height;
+
+   dec->base.destroy = vl_mpeg12_destroy;
+   dec->base.create_buffer = vl_mpeg12_create_buffer;
+   dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer;
+   dec->base.clear_buffer = vl_mpeg12_decoder_clear_buffer;
+
+   dec->pipe = pipe;
+
+   dec->quads = vl_vb_upload_quads(dec->pipe, 2, 2);
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      dec->ves[i] = vl_vb_get_elems_state(dec->pipe, i);
+
+   dec->base.width = align(width, MACROBLOCK_WIDTH);
+   dec->base.height = align(height, MACROBLOCK_HEIGHT);
+
+   if (!init_idct(dec, dec->base.width, dec->base.height))
+      goto error_idct;
+
+   if (!vl_mpeg12_mc_renderer_init(&dec->mc, dec->pipe, dec->base.width, dec->base.height))
+      goto error_mc;
+
+   if (!init_pipe_state(dec))
+      goto error_pipe_state;
+
+   return &dec->base;
+
+error_pipe_state:
+   vl_mpeg12_mc_renderer_cleanup(&dec->mc);
+
+error_mc:
+   vl_idct_cleanup(&dec->idct_y);
+   vl_idct_cleanup(&dec->idct_c);
+
+error_idct:
+   FREE(dec);
+   return NULL;
+}
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
new file mode 100644
index 00000000000..f7dc2d5799a
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -0,0 +1,90 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef VL_MPEG12_DECODER_H
+#define VL_MPEG12_DECODER_H
+
+#include <pipe/p_video_context.h>
+
+#include "vl_idct.h"
+#include "vl_mpeg12_mc_renderer.h"
+
+#include "vl_vertex_buffers.h"
+#include "vl_video_buffer.h"
+
+struct pipe_screen;
+struct pipe_context;
+
+struct vl_mpeg12_decoder
+{
+   struct pipe_video_decoder base;
+   struct pipe_context *pipe;
+
+   const unsigned (*empty_block_mask)[3][2][2];
+
+   struct pipe_vertex_buffer quads;
+   void *ves[VL_MAX_PLANES];
+
+   struct vl_idct idct_y, idct_c;
+   struct vl_mpeg12_mc_renderer mc;
+
+   void *rast;
+   void *dsa;
+   void *blend;
+};
+
+struct vl_mpeg12_buffer
+{
+   struct pipe_video_decode_buffer base;
+
+   struct vl_vertex_buffer vertex_stream;
+
+   struct pipe_video_buffer *idct_source;
+   struct pipe_video_buffer *idct_2_mc;
+
+   union
+   {
+      struct pipe_vertex_buffer all[2];
+      struct {
+         struct pipe_vertex_buffer quad, stream;
+      } individual;
+   } vertex_bufs;
+
+   struct vl_idct_buffer idct[VL_MAX_PLANES];
+   struct vl_mpeg12_mc_buffer mc[VL_MAX_PLANES];
+};
+
+/* drivers can call this function in their pipe_video_context constructors and pass it
+   an accelerated pipe_context along with suitable buffering modes, etc */
+struct pipe_video_decoder *
+vl_create_mpeg12_decoder(struct pipe_video_context *context,
+                         struct pipe_context *pipe,
+                         enum pipe_video_profile profile,
+                         enum pipe_video_chroma_format chroma_format,
+                         unsigned width, unsigned height);
+
+#endif /* VL_MPEG12_DECODER_H */
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
index f0b3d192eb5..5ea0dfa3736 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -25,85 +25,18 @@
  *
  **************************************************************************/
 
-#include "vl_video_buffer.h"
-#include <util/u_format.h>
-#include <util/u_inlines.h>
-#include <util/u_sampler.h>
-#include <pipe/p_screen.h>
-#include <pipe/p_context.h>
 #include <assert.h>
 
-bool vl_video_buffer_init(struct vl_video_buffer *buffer,
-                          struct pipe_context *pipe,
-                          unsigned width, unsigned height, unsigned depth,
-                          enum pipe_video_chroma_format chroma_format,
-                          unsigned num_planes,
-                          const enum pipe_format resource_format[VL_MAX_PLANES],
-                          unsigned usage)
-{
-   struct pipe_resource templ;
-   unsigned i;
-
-   assert(buffer && pipe);
-   assert(num_planes > 0 && num_planes <= VL_MAX_PLANES);
-
-   memset(buffer, 0, sizeof(struct vl_video_buffer));
-   buffer->pipe = pipe;
-   buffer->num_planes = num_planes;
-
-   memset(&templ, 0, sizeof(templ));
-   templ.target = PIPE_TEXTURE_2D;
-   templ.format = resource_format[0];
-   templ.width0 = width;
-   templ.height0 = height;
-   templ.depth0 = depth;
-   templ.array_size = 1;
-   templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-   templ.usage = usage;
-
-   buffer->resources[0] = pipe->screen->resource_create(pipe->screen, &templ);
-   if (!buffer->resources[0])
-      goto error;
-
-   if (num_planes == 1) {
-      assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444);
-      return true;
-   }
-
-   templ.format = resource_format[1];
-   if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      if (depth > 1)
-         templ.depth0 /= 2;
-      else
-         templ.width0 /= 2;
-      templ.height0 /= 2;
-   } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
-      if (depth > 1)
-         templ.depth0 /= 2;
-      else
-         templ.height0 /= 2;
-   }
-
-   buffer->resources[1] = pipe->screen->resource_create(pipe->screen, &templ);
-   if (!buffer->resources[1])
-      goto error;
-
-   if (num_planes == 2)
-      return true;
-
-   templ.format = resource_format[2];
-   buffer->resources[2] = pipe->screen->resource_create(pipe->screen, &templ);
-   if (!buffer->resources[2])
-      goto error;
-
-   return true;
+#include <pipe/p_screen.h>
+#include <pipe/p_context.h>
+#include <pipe/p_state.h>
 
-error:
-   for (i = 0; i < VL_MAX_PLANES; ++i)
-      pipe_resource_reference(&buffer->resources[i], NULL);
+#include <util/u_format.h>
+#include <util/u_inlines.h>
+#include <util/u_sampler.h>
+#include <util/u_memory.h>
 
-   return false;
-}
+#include "vl_video_buffer.h"
 
 static inline void
 adjust_swizzle(struct pipe_sampler_view *sv_templ)
@@ -116,75 +49,160 @@ adjust_swizzle(struct pipe_sampler_view *sv_templ)
    }
 }
 
-vl_sampler_views *vl_video_buffer_sampler_views(struct vl_video_buffer *buffer)
+static void
+vl_video_buffer_destroy(struct pipe_video_buffer *buffer)
+{
+   struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer;
+   unsigned i;
+
+   assert(buf);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      pipe_surface_reference(&buf->surfaces[i], NULL);
+      pipe_sampler_view_reference(&buf->sampler_views[i], NULL);
+      pipe_resource_reference(&buf->resources[i], NULL);
+   }
+}
+
+static struct pipe_sampler_view **
+vl_video_buffer_sampler_views(struct pipe_video_buffer *buffer)
 {
+   struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer;
    struct pipe_sampler_view sv_templ;
    struct pipe_context *pipe;
    unsigned i;
 
-   assert(buffer);
+   assert(buf);
 
-   pipe = buffer->pipe;
+   pipe = buf->pipe;
 
-   for (i = 0; i < buffer->num_planes; ++i ) {
-      if (!buffer->sampler_views[i]) {
+   for (i = 0; i < buf->num_planes; ++i ) {
+      if (!buf->sampler_views[i]) {
          memset(&sv_templ, 0, sizeof(sv_templ));
-         u_sampler_view_default_template(&sv_templ, buffer->resources[i], buffer->resources[i]->format);
+         u_sampler_view_default_template(&sv_templ, buf->resources[i], buf->resources[i]->format);
          adjust_swizzle(&sv_templ);
-         buffer->sampler_views[i] = pipe->create_sampler_view(pipe, buffer->resources[i], &sv_templ);
-         if (!buffer->sampler_views[i])
+         buf->sampler_views[i] = pipe->create_sampler_view(pipe, buf->resources[i], &sv_templ);
+         if (!buf->sampler_views[i])
             goto error;
       }
    }
 
-   return &buffer->sampler_views;
+   return buf->sampler_views;
 
 error:
-   for (i = 0; i < buffer->num_planes; ++i )
-      pipe_sampler_view_reference(&buffer->sampler_views[i], NULL);
+   for (i = 0; i < buf->num_planes; ++i )
+      pipe_sampler_view_reference(&buf->sampler_views[i], NULL);
 
    return NULL;
 }
 
-vl_surfaces *vl_video_buffer_surfaces(struct vl_video_buffer *buffer)
+static struct pipe_surface **
+vl_video_buffer_surfaces(struct pipe_video_buffer *buffer)
 {
+   struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer;
    struct pipe_surface surf_templ;
    struct pipe_context *pipe;
    unsigned i;
 
-   assert(buffer);
+   assert(buf);
 
-   pipe = buffer->pipe;
+   pipe = buf->pipe;
 
-   for (i = 0; i < buffer->num_planes; ++i ) {
-      if (!buffer->surfaces[i]) {
+   for (i = 0; i < buf->num_planes; ++i ) {
+      if (!buf->surfaces[i]) {
          memset(&surf_templ, 0, sizeof(surf_templ));
-         surf_templ.format = buffer->resources[i]->format;
+         surf_templ.format = buf->resources[i]->format;
          surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-         buffer->surfaces[i] = pipe->create_surface(pipe, buffer->resources[i], &surf_templ);
-         if (!buffer->surfaces[i])
+         buf->surfaces[i] = pipe->create_surface(pipe, buf->resources[i], &surf_templ);
+         if (!buf->surfaces[i])
             goto error;
       }
    }
 
-   return &buffer->surfaces;
+   return buf->surfaces;
 
 error:
-   for (i = 0; i < buffer->num_planes; ++i )
-      pipe_surface_reference(&buffer->surfaces[i], NULL);
+   for (i = 0; i < buf->num_planes; ++i )
+      pipe_surface_reference(&buf->surfaces[i], NULL);
 
    return NULL;
 }
 
-void vl_video_buffer_cleanup(struct vl_video_buffer *buffer)
+struct pipe_video_buffer *
+vl_video_buffer_init(struct pipe_video_context *context,
+                     struct pipe_context *pipe,
+                     unsigned width, unsigned height, unsigned depth,
+                     enum pipe_video_chroma_format chroma_format,
+                     unsigned num_planes,
+                     const enum pipe_format resource_formats[VL_MAX_PLANES],
+                     unsigned usage)
 {
+   struct vl_video_buffer *buffer;
+   struct pipe_resource templ;
    unsigned i;
 
-   assert(buffer);
+   assert(context && pipe);
+   assert(num_planes > 0 && num_planes <= VL_MAX_PLANES);
+
+   buffer = CALLOC_STRUCT(vl_video_buffer);
 
-   for (i = 0; i < VL_MAX_PLANES; ++i) {
-      pipe_surface_reference(&buffer->surfaces[i], NULL);
-      pipe_sampler_view_reference(&buffer->sampler_views[i], NULL);
-      pipe_resource_reference(&buffer->resources[i], NULL);
+   buffer->base.destroy = vl_video_buffer_destroy;
+   buffer->base.get_sampler_views = vl_video_buffer_sampler_views;
+   buffer->base.get_surfaces = vl_video_buffer_surfaces;
+   buffer->pipe = pipe;
+   buffer->num_planes = num_planes;
+
+   memset(&templ, 0, sizeof(templ));
+   templ.target = PIPE_TEXTURE_2D;
+   templ.format = resource_formats[0];
+   templ.width0 = width;
+   templ.height0 = height;
+   templ.depth0 = depth;
+   templ.array_size = 1;
+   templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   templ.usage = usage;
+
+   buffer->resources[0] = pipe->screen->resource_create(pipe->screen, &templ);
+   if (!buffer->resources[0])
+      goto error;
+
+   if (num_planes == 1) {
+      assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444);
+      return &buffer->base;
+   }
+
+   templ.format = resource_formats[1];
+   if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+      if (depth > 1)
+         templ.depth0 /= 2;
+      else
+         templ.width0 /= 2;
+      templ.height0 /= 2;
+   } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
+      if (depth > 1)
+         templ.depth0 /= 2;
+      else
+         templ.height0 /= 2;
    }
+
+   buffer->resources[1] = pipe->screen->resource_create(pipe->screen, &templ);
+   if (!buffer->resources[1])
+      goto error;
+
+   if (num_planes == 2)
+      return &buffer->base;
+
+   templ.format = resource_formats[2];
+   buffer->resources[2] = pipe->screen->resource_create(pipe->screen, &templ);
+   if (!buffer->resources[2])
+      goto error;
+
+   return &buffer->base;
+
+error:
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      pipe_resource_reference(&buffer->resources[i], NULL);
+   FREE(buffer);
+
+   return NULL;
 }
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h
index adba6c56e88..1acc9f49622 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.h
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.h
@@ -28,7 +28,8 @@
 #ifndef vl_ycbcr_buffer_h
 #define vl_ycbcr_buffer_h
 
-#include <pipe/p_state.h>
+#include <pipe/p_context.h>
+#include <pipe/p_video_context.h>
 
 #define VL_MAX_PLANES 3
 
@@ -36,49 +37,26 @@
  * implementation of a planar ycbcr buffer
  */
 
-/* resources of a buffer */
-typedef struct pipe_resource *vl_resources[VL_MAX_PLANES];
-
-/* sampler views of a buffer */
-typedef struct pipe_sampler_view *vl_sampler_views[VL_MAX_PLANES];
-
-/* surfaces of a buffer */
-typedef struct pipe_surface *vl_surfaces[VL_MAX_PLANES];
-
 /* planar buffer for vl data upload and manipulation */
 struct vl_video_buffer
 {
-   struct pipe_context *pipe;
-   unsigned            num_planes;
-   vl_resources        resources;
-   vl_sampler_views    sampler_views;
-   vl_surfaces         surfaces;
+   struct pipe_video_buffer base;
+   struct pipe_context      *pipe;
+   unsigned                 num_planes;
+   struct pipe_resource     *resources[VL_MAX_PLANES];
+   struct pipe_sampler_view *sampler_views[VL_MAX_PLANES];
+   struct pipe_surface      *surfaces[VL_MAX_PLANES];
 };
 
 /**
  * initialize a buffer, creating its resources
  */
-bool vl_video_buffer_init(struct vl_video_buffer *buffer,
-                          struct pipe_context *pipe,
-                          unsigned width, unsigned height, unsigned depth,
-                          enum pipe_video_chroma_format chroma_format,
-                          unsigned num_planes,
-                          const enum pipe_format resource_formats[VL_MAX_PLANES],
-                          unsigned usage);
-
-/**
- * create default sampler views for the buffer on demand
- */
-vl_sampler_views *vl_video_buffer_sampler_views(struct vl_video_buffer *buffer);
-
-/**
- * create default surfaces for the buffer on demand
- */
-vl_surfaces *vl_video_buffer_surfaces(struct vl_video_buffer *buffer);
-
-/**
- * cleanup the buffer destroying all its resources
- */
-void vl_video_buffer_cleanup(struct vl_video_buffer *buffer);
-
+struct pipe_video_buffer *
+vl_video_buffer_init(struct pipe_video_context *context,
+                     struct pipe_context *pipe,
+                     unsigned width, unsigned height, unsigned depth,
+                     enum pipe_video_chroma_format chroma_format,
+                     unsigned num_planes,
+                     const enum pipe_format resource_formats[VL_MAX_PLANES],
+                     unsigned usage);
 #endif
diff --git a/src/gallium/drivers/nv40/nv40_video_context.c b/src/gallium/drivers/nv40/nv40_video_context.c
index 34bb7cdbdda..cd231e434a5 100644
--- a/src/gallium/drivers/nv40/nv40_video_context.c
+++ b/src/gallium/drivers/nv40/nv40_video_context.c
@@ -27,12 +27,10 @@
 
 #include "nv40_video_context.h"
 #include "util/u_video.h"
-#include <vl/vl_mpeg12_context.h>
+#include <vl/vl_context.h>
 
 struct pipe_video_context *
-nv40_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
-                  enum pipe_video_chroma_format chroma_format,
-                  unsigned width, unsigned height, void *priv)
+nv40_video_create(struct pipe_screen *screen, void *priv)
 {
    struct pipe_context *pipe;
 
@@ -42,13 +40,5 @@ nv40_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
    if (!pipe)
       return NULL;
 
-   switch (u_reduce_video_profile(profile)) {
-      case PIPE_VIDEO_CODEC_MPEG12:
-         return vl_create_mpeg12_context(pipe, profile,
-                                         chroma_format,
-                                         width, height,
-                                         true);
-      default:
-         return NULL;
-   }
+   return vl_create_context(pipe, true);
 }
diff --git a/src/gallium/drivers/nv40/nv40_video_context.h b/src/gallium/drivers/nv40/nv40_video_context.h
index 64196caca72..d34ab7ab130 100644
--- a/src/gallium/drivers/nv40/nv40_video_context.h
+++ b/src/gallium/drivers/nv40/nv40_video_context.h
@@ -31,8 +31,6 @@
 #include <pipe/p_video_context.h>
 
 struct pipe_video_context *
-nv40_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
-                  enum pipe_video_chroma_format chroma_format,
-                  unsigned width, unsigned height, void *priv);
+nv40_video_create(struct pipe_screen *screen, void *priv);
 
 #endif
diff --git a/src/gallium/drivers/nvfx/nvfx_video_context.c b/src/gallium/drivers/nvfx/nvfx_video_context.c
index 01a84f2ebc6..9c9ccfe317d 100644
--- a/src/gallium/drivers/nvfx/nvfx_video_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_video_context.c
@@ -27,12 +27,10 @@
 
 #include "nvfx_video_context.h"
 #include "util/u_video.h"
-#include <vl/vl_mpeg12_context.h>
+#include <vl/vl_context.h>
 
 struct pipe_video_context *
-nvfx_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
-                  enum pipe_video_chroma_format chroma_format,
-                  unsigned width, unsigned height, void *priv)
+nvfx_video_create(struct pipe_screen *screen, void *priv)
 {
    struct pipe_context *pipe;
 
@@ -42,13 +40,5 @@ nvfx_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
    if (!pipe)
       return NULL;
 
-   switch (u_reduce_video_profile(profile)) {
-      case PIPE_VIDEO_CODEC_MPEG12:
-         return vl_create_mpeg12_context(pipe, profile,
-                                         chroma_format,
-                                         width, height,
-                                         true);
-      default:
-         return NULL;
-   }
+   return vl_create_context(pipe, profile, true);
 }
diff --git a/src/gallium/drivers/nvfx/nvfx_video_context.h b/src/gallium/drivers/nvfx/nvfx_video_context.h
index 6619427cc29..b220b9f82dc 100644
--- a/src/gallium/drivers/nvfx/nvfx_video_context.h
+++ b/src/gallium/drivers/nvfx/nvfx_video_context.h
@@ -31,8 +31,6 @@
 #include <pipe/p_video_context.h>
 
 struct pipe_video_context *
-nvfx_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
-                  enum pipe_video_chroma_format chroma_format,
-                  unsigned width, unsigned height, void *priv);
+nvfx_video_create(struct pipe_screen *screen, void *priv);
 
 #endif
diff --git a/src/gallium/drivers/r600/r600_video_context.c b/src/gallium/drivers/r600/r600_video_context.c
index c1b0c098245..a0ab3475fde 100644
--- a/src/gallium/drivers/r600/r600_video_context.c
+++ b/src/gallium/drivers/r600/r600_video_context.c
@@ -27,12 +27,10 @@
 
 #include "r600_video_context.h"
 #include "util/u_video.h"
-#include <vl/vl_mpeg12_context.h>
+#include <vl/vl_context.h>
 
 struct pipe_video_context *
-r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
-                  enum pipe_video_chroma_format chroma_format,
-                  unsigned width, unsigned height, void *priv)
+r600_video_create(struct pipe_screen *screen, void *priv)
 {
    struct pipe_context *pipe;
 
@@ -42,13 +40,5 @@ r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
    if (!pipe)
       return NULL;
 
-   switch (u_reduce_video_profile(profile)) {
-      case PIPE_VIDEO_CODEC_MPEG12:
-         return vl_create_mpeg12_context(pipe, profile,
-                                         chroma_format,
-                                         width, height,
-                                         false);
-      default:
-         return NULL;
-   }
+   return vl_create_context(pipe, false);
 }
diff --git a/src/gallium/drivers/r600/r600_video_context.h b/src/gallium/drivers/r600/r600_video_context.h
index bda33a00d44..f579980bd36 100644
--- a/src/gallium/drivers/r600/r600_video_context.h
+++ b/src/gallium/drivers/r600/r600_video_context.h
@@ -4,8 +4,6 @@
 #include <pipe/p_video_context.h>
 
 struct pipe_video_context *
-r600_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
-                  enum pipe_video_chroma_format chroma_format,
-                  unsigned width, unsigned height, void *priv);
+r600_video_create(struct pipe_screen *screen, void *priv);
 
 #endif
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index d5936641ba5..bb72cf63bc5 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -33,7 +33,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 #include "draw/draw_context.h"
-#include "vl/vl_mpeg12_context.h"
+#include "vl/vl_context.h"
 
 #include "state_tracker/sw_winsys.h"
 #include "tgsi/tgsi_exec.h"
@@ -288,29 +288,18 @@ softpipe_flush_frontbuffer(struct pipe_screen *_screen,
 }
 
 static struct pipe_video_context *
-sp_video_create(struct pipe_screen *screen, enum pipe_video_profile profile,
-                enum pipe_video_chroma_format chroma_format,
-                unsigned width, unsigned height, void *priv)
+sp_video_create(struct pipe_screen *screen, void *priv)
 {
    struct pipe_context *pipe;
 
    assert(screen);
-   assert(width && height);
 
    pipe = screen->context_create(screen, NULL);
    if (!pipe)
       return NULL;
 
    /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture with softpipe */
-   switch (u_reduce_video_profile(profile)) {
-      case PIPE_VIDEO_CODEC_MPEG12:
-         return vl_create_mpeg12_context(pipe, profile,
-                                         chroma_format,
-                                         width, height,
-                                         true);
-      default:
-         return NULL;
-   }
+   return vl_create_context(pipe, true);
 }
 
 /**
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index 0d1b1094550..4f95fa1b2e1 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,12 +22,12 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 /**
  * @file
- * 
+ *
  * Screen, Adapter or GPU
  *
  * These are driver functions/facilities that are context independent.
@@ -94,11 +94,7 @@ struct pipe_screen {
 
    struct pipe_context * (*context_create)( struct pipe_screen *, void *priv );
 
-   struct pipe_video_context * (*video_context_create)( struct pipe_screen *screen,
-                                                        enum pipe_video_profile profile,
-                                                        enum pipe_video_chroma_format chroma_format,
-                                                        unsigned width, unsigned height, void *priv );
-
+   struct pipe_video_context * (*video_context_create)( struct pipe_screen *screen, void *priv );
 
    /**
     * Check if the given pipe_format is supported as a texture or
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index dec524d5290..33c64baf7c4 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -35,12 +35,10 @@ extern "C" {
 #include <pipe/p_video_state.h>
 
 struct pipe_screen;
-struct pipe_buffer;
 struct pipe_surface;
 struct pipe_macroblock;
 struct pipe_picture_desc;
 struct pipe_fence_handle;
-struct pipe_video_buffer;
 
 /**
  * Gallium video rendering context
@@ -48,36 +46,33 @@ struct pipe_video_buffer;
 struct pipe_video_context
 {
    struct pipe_screen *screen;
-   enum pipe_video_profile profile;
-   enum pipe_video_chroma_format chroma_format;
-   unsigned width;
-   unsigned height;
 
    void *priv; /**< context private data (for DRI for example) */
 
+   /**
+    * destroy context, all objects created from this context
+    * (buffers, decoders, compositors etc...) must be freed before calling this
+    */
+   void (*destroy)(struct pipe_video_context *context);
+
    /**
     * Query an integer-valued capability/parameter/limit
     * \param param  one of PIPE_CAP_x
     */
-   int (*get_param)(struct pipe_video_context *vpipe, int param);
+   int (*get_param)(struct pipe_video_context *context, int param);
 
    /**
     * Check if the given pipe_format is supported as a texture or
     * drawing surface.
     */
-   boolean (*is_format_supported)(struct pipe_video_context *vpipe,
+   boolean (*is_format_supported)(struct pipe_video_context *context,
                                   enum pipe_format format,
                                   unsigned usage);
 
-   /**
-    * destroy context, all buffers must be freed before calling this
-    */
-   void (*destroy)(struct pipe_video_context *vpipe);
-
    /**
     * create a surface of a texture
     */
-   struct pipe_surface *(*create_surface)(struct pipe_video_context *vpipe,
+   struct pipe_surface *(*create_surface)(struct pipe_video_context *context,
                                           struct pipe_resource *resource,
                                           const struct pipe_surface *templ);
 
@@ -89,14 +84,14 @@ struct pipe_video_context
    /**
     * create a sampler view of a texture, for subpictures for example
     */
-   struct pipe_sampler_view *(*create_sampler_view)(struct pipe_video_context *vpipe,
+   struct pipe_sampler_view *(*create_sampler_view)(struct pipe_video_context *context,
                                                     struct pipe_resource *resource,
                                                     const struct pipe_sampler_view *templ);
 
    /**
     * upload image data to a sampler
     */
-   void (*upload_sampler)(struct pipe_video_context *vpipe,
+   void (*upload_sampler)(struct pipe_video_context *context,
                           struct pipe_sampler_view *dst,
                           const struct pipe_box *dst_box,
                           const void *src, unsigned src_stride,
@@ -105,74 +100,145 @@ struct pipe_video_context
    /**
     * clear a sampler with a specific rgba color
     */
-   void (*clear_sampler)(struct pipe_video_context *vpipe,
+   void (*clear_sampler)(struct pipe_video_context *context,
                          struct pipe_sampler_view *dst,
                          const struct pipe_box *dst_box,
                          const float *rgba);
 
+   /*}@*/
+
+   /**
+    * create a decoder for a specific video profile
+    */
+   struct pipe_video_decoder *(*create_decoder)(struct pipe_video_context *context,
+                                                enum pipe_video_profile profile,
+                                                enum pipe_video_chroma_format chroma_format,
+                                                unsigned width, unsigned height);
+
    /**
     * Creates a buffer as decoding target
     */
-   struct pipe_video_buffer *(*create_buffer)(struct pipe_video_context *vpipe);
+   struct pipe_video_buffer *(*create_buffer)(struct pipe_video_context *context,
+                                              enum pipe_format buffer_format,
+                                              enum pipe_video_chroma_format chroma_format,
+                                              unsigned width, unsigned height);
 
    /**
     * Creates a video compositor
     */
-   struct pipe_video_compositor *(*create_compositor)(struct pipe_video_context *vpipe);
+   struct pipe_video_compositor *(*create_compositor)(struct pipe_video_context *context);
+};
+
+/**
+ * decoder for a specific video codec
+ */
+struct pipe_video_decoder
+{
+   struct pipe_video_context *context;
+
+   enum pipe_video_profile profile;
+   enum pipe_video_chroma_format chroma_format;
+   unsigned width;
+   unsigned height;
 
    /**
-    * Picture decoding and displaying
+    * destroy this video decoder
     */
+   void (*destroy)(struct pipe_video_decoder *decoder);
 
-#if 0
-   void (*decode_bitstream)(struct pipe_video_context *vpipe,
-                            unsigned num_bufs,
-                            struct pipe_buffer **bitstream_buf);
-#endif
+   /**
+    * Creates a buffer as decoding input
+    */
+   struct pipe_video_decode_buffer *(*create_buffer)(struct pipe_video_decoder *decoder);
+
+   /**
+    * flush decoder buffer to video hardware
+    */
+   void (*flush_buffer)(struct pipe_video_decode_buffer *decbuf,
+                        struct pipe_video_buffer *ref_frames[2],
+                        struct pipe_video_buffer *dst,
+                        struct pipe_fence_handle **fence);
+
+   /**
+    * clear decoder buffers todo list
+    */
+   void (*clear_buffer)(struct pipe_video_decode_buffer *decbuf);
 
 };
 
-struct pipe_video_buffer
+/**
+ * input buffer for a decoder
+ */
+struct pipe_video_decode_buffer
 {
-   struct pipe_video_context* context;
+   struct pipe_video_decoder *decoder;
 
    /**
-    * destroy this video buffer
+    * destroy this decode buffer
     */
-   void (*destroy)(struct pipe_video_buffer *buffer);
+   void (*destroy)(struct pipe_video_decode_buffer *decbuf);
 
    /**
-    * map the buffer into memory before calling add_macroblocks
+    * map the input buffer into memory before starting decoding
     */
-   void (*map)(struct pipe_video_buffer *buffer);
+   void (*map)(struct pipe_video_decode_buffer *decbuf);
 
+#if 0
    /**
-    * add macroblocks to buffer for decoding
+    * decode a bitstream
     */
-   void (*add_macroblocks)(struct pipe_video_buffer *buffer,
+   void (*decode_bitstream)(struct pipe_video_decode_buffer *decbuf,
+                            unsigned num_bufs,
+                            struct pipe_buffer **bitstream_buf);
+#endif
+
+   /**
+    * add macroblocks to decoder buffer
+    */
+   void (*add_macroblocks)(struct pipe_video_decode_buffer *decbuf,
                            unsigned num_macroblocks,
                            struct pipe_macroblock *macroblocks);
 
    /**
-    * unmap buffer before flushing
+    * unmap decoder buffer before flushing
     */
-   void (*unmap)(struct pipe_video_buffer *buffer);
+   void (*unmap)(struct pipe_video_decode_buffer *decbuf);
+};
+
+/**
+ * output for decoding / input for displaying
+ */
+struct pipe_video_buffer
+{
+   struct pipe_video_context *context;
+
+   enum pipe_format buffer_format;
+   enum pipe_video_chroma_format chroma_format;
+   unsigned width;
+   unsigned height;
 
    /**
-    * flush buffer to video hardware
+    * destroy this video buffer
     */
-   void (*flush)(struct pipe_video_buffer *buffer,
-                 struct pipe_video_buffer *ref_frames[2],
-                 struct pipe_fence_handle **fence);
+   void (*destroy)(struct pipe_video_buffer *buffer);
 
+   /**
+    * get a individual sampler view for each plane
+    */
+   struct pipe_sampler_view **(*get_sampler_views)(struct pipe_video_buffer *buffer);
 
-   void (*get_sampler_views)(struct pipe_video_buffer *buffer,
-                             struct pipe_sampler_view *sampler_views[3]);
+   /**
+    * get a individual surfaces for each plane
+    */
+   struct pipe_surface **(*get_surfaces)(struct pipe_video_buffer *buffer);
 };
 
+/**
+ * composing and displaying of image data
+ */
 struct pipe_video_compositor
 {
-   struct pipe_video_context* context;
+   struct pipe_video_context *context;
 
    /**
     * destroy this compositor
diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index bdcba72d7cf..2690f8046a8 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -231,9 +231,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       return BadAlloc;
    }
 
-   vctx = vl_video_create(vscreen, ProfileToPipe(mc_type),
-                          FormatToPipe(chroma_format), width, height);
-
+   vctx = vl_video_create(vscreen);
    if (!vctx) {
       XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL context.\n");
       vl_screen_destroy(vscreen);
@@ -241,9 +239,23 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       return BadAlloc;
    }
 
+   context_priv->decoder = vctx->vpipe->create_decoder(vctx->vpipe,
+                                                       ProfileToPipe(mc_type),
+                                                       FormatToPipe(chroma_format),
+                                                       width, height);
+
+   if (!context_priv->decoder) {
+      XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL decoder.\n");
+      vl_video_destroy(vctx);
+      vl_screen_destroy(vscreen);
+      FREE(context_priv);
+      return BadAlloc;
+   }
+
    context_priv->compositor = vctx->vpipe->create_compositor(vctx->vpipe);
    if (!context_priv->compositor) {
       XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL compositor.\n");
+      context_priv->decoder->destroy(context_priv->decoder);
       vl_video_destroy(vctx);
       vl_screen_destroy(vscreen);
       FREE(context_priv);
@@ -295,6 +307,7 @@ Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
    context_priv = context->privData;
    vctx = context_priv->vctx;
    vscreen = vctx->vscreen;
+   context_priv->decoder->destroy(context_priv->decoder);
    context_priv->compositor->destroy(context_priv->compositor);
    vl_video_destroy(vctx);
    vl_screen_destroy(vscreen);
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index efbebaa4ccf..ba55d9ddb22 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -151,10 +151,13 @@ static void
 unmap_and_flush_surface(XvMCSurfacePrivate *surface)
 {
    struct pipe_video_buffer *ref_frames[2];
+   XvMCContextPrivate *context_priv;
    unsigned i;
 
    assert(surface);
 
+   context_priv = surface->context->privData;
+
    for ( i = 0; i < 2; ++i ) {
       if (surface->ref_surfaces[i]) {
          XvMCSurfacePrivate *ref = surface->ref_surfaces[i]->privData;
@@ -163,17 +166,18 @@ unmap_and_flush_surface(XvMCSurfacePrivate *surface)
 
          unmap_and_flush_surface(ref);
          surface->ref_surfaces[i] = NULL;
-         ref_frames[i] = ref->pipe_buffer;
+         ref_frames[i] = ref->video_buffer;
       } else {
          ref_frames[i] = NULL;
       }
    }
 
    if (surface->mapped) {
-      surface->pipe_buffer->unmap(surface->pipe_buffer);
-      surface->pipe_buffer->flush(surface->pipe_buffer,
-                                  ref_frames,
-                                  &surface->flush_fence);
+      surface->decode_buffer->unmap(surface->decode_buffer);
+      context_priv->decoder->flush_buffer(surface->decode_buffer,
+                                          ref_frames,
+                                          surface->video_buffer,
+                                          &surface->flush_fence);
       surface->mapped = 0;
    }
 }
@@ -201,7 +205,11 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
    if (!surface_priv)
       return BadAlloc;
 
-   surface_priv->pipe_buffer = vpipe->create_buffer(vpipe);
+   surface_priv->decode_buffer = context_priv->decoder->create_buffer(context_priv->decoder);
+   surface_priv->video_buffer = vpipe->create_buffer(vpipe, PIPE_FORMAT_YV12, //TODO
+                                                     context_priv->decoder->chroma_format,
+                                                     context_priv->decoder->width,
+                                                     context_priv->decoder->height);
    surface_priv->context = context;
 
    surface->surface_id = XAllocID(dpy);
@@ -226,7 +234,7 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
 )
 {
    struct pipe_video_context *vpipe;
-   struct pipe_video_buffer *t_buffer;
+   struct pipe_video_decode_buffer *t_buffer;
    XvMCContextPrivate *context_priv;
    XvMCSurfacePrivate *target_surface_priv;
    XvMCSurfacePrivate *past_surface_priv;
@@ -274,7 +282,7 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    context_priv = context->privData;
    vpipe = context_priv->vctx->vpipe;
 
-   t_buffer = target_surface_priv->pipe_buffer;
+   t_buffer = target_surface_priv->decode_buffer;
 
    // enshure that all reference frames are flushed
    // not really nessasary, but speeds ups rendering
@@ -395,7 +403,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    unmap_and_flush_surface(surface_priv);
 
    compositor->clear_layers(compositor);
-   compositor->set_buffer_layer(compositor, 0, surface_priv->pipe_buffer, &src_rect, NULL);
+   compositor->set_buffer_layer(compositor, 0, surface_priv->video_buffer, &src_rect, NULL);
 
    if (subpicture_priv) {
       struct pipe_video_rect src_rect = {surface_priv->subx, surface_priv->suby, surface_priv->subw, surface_priv->subh};
@@ -471,7 +479,8 @@ Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
       return XvMCBadSurface;
 
    surface_priv = surface->privData;
-   surface_priv->pipe_buffer->destroy(surface_priv->pipe_buffer);
+   surface_priv->decode_buffer->destroy(surface_priv->decode_buffer);
+   surface_priv->video_buffer->destroy(surface_priv->video_buffer);
    FREE(surface_priv);
    surface->privData = NULL;
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 29518b36dbf..9a5338c2923 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -42,6 +42,7 @@ struct pipe_fence_handle;
 typedef struct
 {
    struct vl_context *vctx;
+   struct pipe_video_decoder *decoder;
    struct pipe_video_compositor *compositor;
 
    unsigned short subpicture_max_width;
@@ -50,7 +51,9 @@ typedef struct
 
 typedef struct
 {
-   struct pipe_video_buffer *pipe_buffer;
+   struct pipe_video_decode_buffer *decode_buffer;
+   struct pipe_video_buffer *video_buffer;
+
    bool mapped; // are we still mapped to memory?
 
    XvMCSurface *ref_surfaces[2];
diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
index 8588ddd17cb..ffb94de4a7b 100644
--- a/src/gallium/winsys/g3dvl/dri/dri_winsys.c
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -232,10 +232,7 @@ void vl_screen_destroy(struct vl_screen *vscreen)
 }
 
 struct vl_context*
-vl_video_create(struct vl_screen *vscreen,
-                enum pipe_video_profile profile,
-                enum pipe_video_chroma_format chroma_format,
-                unsigned width, unsigned height)
+vl_video_create(struct vl_screen *vscreen)
 {
    struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)vscreen;
    struct vl_dri_context *vl_dri_ctx;
@@ -251,10 +248,7 @@ vl_video_create(struct vl_screen *vscreen,
       goto no_vpipe;
    }
 
-   vl_dri_ctx->base.vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen,
-                                                                   profile, chroma_format,
-                                                                   width, height,
-                                                                   vl_dri_ctx);
+   vl_dri_ctx->base.vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen, vl_dri_ctx);
 
    if (!vl_dri_ctx->base.vpipe)
       goto no_vpipe;
diff --git a/src/gallium/winsys/g3dvl/vl_winsys.h b/src/gallium/winsys/g3dvl/vl_winsys.h
index 58f548849f6..152a4a62292 100644
--- a/src/gallium/winsys/g3dvl/vl_winsys.h
+++ b/src/gallium/winsys/g3dvl/vl_winsys.h
@@ -53,10 +53,7 @@ vl_screen_create(Display *display, int screen);
 void vl_screen_destroy(struct vl_screen *vscreen);
 
 struct vl_context*
-vl_video_create(struct vl_screen *vscreen,
-                enum pipe_video_profile profile,
-                enum pipe_video_chroma_format chroma_format,
-                unsigned width, unsigned height);
+vl_video_create(struct vl_screen *vscreen);
 
 void vl_video_destroy(struct vl_context *vctx);
 
-- 
cgit v1.2.3


From 05a2247a945db21ec5fdb39f6270c5cb2ab6bb6f Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 6 Apr 2011 20:36:31 +0200
Subject: [g3dvl] set buffer_format correctly

---
 src/gallium/auxiliary/vl/vl_context.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index e352475cb8d..5a55f3b9607 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -208,6 +208,7 @@ vl_context_create_buffer(struct pipe_video_context *context,
    };
 
    struct vl_context *ctx = (struct vl_context*)context;
+   struct pipe_video_buffer *result;
    unsigned buffer_width, buffer_height;
 
    assert(context);
@@ -217,11 +218,15 @@ vl_context_create_buffer(struct pipe_video_context *context,
    buffer_width = ctx->pot_buffers ? util_next_power_of_two(width) : width;
    buffer_height = ctx->pot_buffers ? util_next_power_of_two(height) : height;
 
-   return vl_video_buffer_init(context, ctx->pipe,
-                               buffer_width, buffer_height, 1,
-                               chroma_format, 3,
-                               resource_formats,
-                               PIPE_USAGE_STATIC);
+   result = vl_video_buffer_init(context, ctx->pipe,
+                                 buffer_width, buffer_height, 1,
+                                 chroma_format, 3,
+                                 resource_formats,
+                                 PIPE_USAGE_STATIC);
+   if (result) // TODO move format handling into vl_video_buffer
+      result->buffer_format = buffer_format;
+
+   return result;
 }
 
 static struct pipe_video_compositor *
-- 
cgit v1.2.3


From 2c21d28e8315ff65cb6f47fda46cbd65d67fb4e7 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 6 Apr 2011 20:38:16 +0200
Subject: vdpau: implement vlVdpVideoSurfacePutBitsYCbCr

---
 src/gallium/state_trackers/vdpau/device.c        |  8 +++
 src/gallium/state_trackers/vdpau/surface.c       | 82 ++++++++++++++----------
 src/gallium/state_trackers/vdpau/vdpau_private.h |  9 +--
 3 files changed, 57 insertions(+), 42 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index d3314d0d4bf..7906757ec1b 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -61,6 +61,12 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
       goto no_vscreen;
    }
 
+   dev->context = vl_video_create(dev->vscreen);
+   if (!dev->context) {
+      ret = VDP_STATUS_RESOURCES;
+      goto no_context;
+   }
+
    *device = vlAddDataHTAB(dev);
    if (*device == 0) {
       ret = VDP_STATUS_ERROR;
@@ -74,6 +80,8 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
 
 no_handle:
    /* Destroy vscreen */
+no_context:
+   vl_screen_destroy(dev->vscreen);
 no_vscreen:
    FREE(dev);
 no_dev:
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index aa8f90d576a..496f647a8d0 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -1,6 +1,7 @@
 /**************************************************************************
  *
  * Copyright 2010 Thomas Balling Sørensen.
+ * Copyright 2011 Christian König.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,22 +26,26 @@
  *
  **************************************************************************/
 
-#include "vdpau_private.h"
-#include <pipe/p_screen.h>
+#include <assert.h>
+
+#include <pipe/p_video_context.h>
 #include <pipe/p_state.h>
+
 #include <util/u_memory.h>
-#include <util/u_format.h>
+#include <util/u_debug.h>
+
+#include "vdpau_private.h"
 
 VdpStatus
 vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
                         uint32_t width, uint32_t height,
                         VdpVideoSurface *surface)
 {
-   printf("[VDPAU] Creating a surface\n");
-
    vlVdpSurface *p_surf;
    VdpStatus ret;
 
+   _debug_printf("[VDPAU] Creating a surface\n");
+
    if (!(width && height)) {
       ret = VDP_STATUS_INVALID_SIZE;
       goto inv_size;
@@ -63,10 +68,11 @@ vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
       goto inv_device;
    }
 
-   p_surf->chroma_format = ChromaToPipe(chroma_type);
    p_surf->device = dev;
-   p_surf->width = width;
-   p_surf->height = height;
+   p_surf->video_buffer = dev->context->vpipe->create_buffer(dev->context->vpipe,
+                                                             PIPE_FORMAT_YV12, // most common used
+                                                             ChromaToPipe(chroma_type),
+                                                             width, height);
 
    *surface = vlAddDataHTAB(p_surf);
    if (*surface == 0) {
@@ -77,12 +83,12 @@ vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
    return VDP_STATUS_OK;
 
 no_handle:
-   //FREE(p_surf->psurface);
+   p_surf->video_buffer->destroy(p_surf->video_buffer);
+
 inv_device:
-no_surf:
    FREE(p_surf);
+
 no_res:
-   // vlDestroyHTAB(); XXX: Do not destroy this tab, I think.
 no_htab:
 inv_size:
    return ret;
@@ -97,12 +103,9 @@ vlVdpVideoSurfaceDestroy(VdpVideoSurface surface)
    if (!p_surf)
       return VDP_STATUS_INVALID_HANDLE;
 
-   //if (p_surf->psurface) {
-   //   if (p_surf->psurface->texture) {
-   //      if (p_surf->psurface->texture->screen)
-   //         p_surf->psurface->context->surface_destroy(p_surf->psurface->context, p_surf->psurface);
-   //   }
-   //}
+   if (p_surf->video_buffer)
+      p_surf->video_buffer->destroy(p_surf->video_buffer);
+
    FREE(p_surf);
    return VDP_STATUS_OK;
 }
@@ -119,12 +122,9 @@ vlVdpVideoSurfaceGetParameters(VdpVideoSurface surface,
    if (!p_surf)
       return VDP_STATUS_INVALID_HANDLE;
 
-   if (!(p_surf->chroma_format > 0 && p_surf->chroma_format < 3))
-      return VDP_STATUS_INVALID_CHROMA_TYPE;
-
-   *width = p_surf->width;
-   *height = p_surf->height;
-   *chroma_type = PipeToChroma(p_surf->chroma_format);
+   *width = p_surf->video_buffer->width;
+   *height = p_surf->video_buffer->height;
+   *chroma_type = PipeToChroma(p_surf->video_buffer->chroma_format);
 
    return VDP_STATUS_OK;
 }
@@ -145,7 +145,8 @@ vlVdpVideoSurfaceGetBitsYCbCr(VdpVideoSurface surface,
    //if (!p_surf->psurface)
    //   return VDP_STATUS_RESOURCES;
 
-   return VDP_STATUS_OK;
+   //return VDP_STATUS_OK;
+   return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
 VdpStatus
@@ -154,9 +155,10 @@ vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface,
                               void const *const *source_data,
                               uint32_t const *source_pitches)
 {
-   uint32_t size_surface_bytes;
-   const struct util_format_description *format_desc;
    enum pipe_format pformat = FormatToPipe(source_ycbcr_format);
+   struct pipe_video_context *context;
+   struct pipe_sampler_view **sampler_views;
+   unsigned i;
 
    if (!vlCreateHTAB())
       return VDP_STATUS_RESOURCES;
@@ -165,14 +167,24 @@ vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface,
    if (!p_surf)
       return VDP_STATUS_INVALID_HANDLE;
 
-   //size_surface_bytes = ( source_pitches[0] * p_surf->height util_format_get_blockheight(pformat) );
-   /*util_format_translate(enum pipe_format dst_format,
-   void *dst, unsigned dst_stride,
-   unsigned dst_x, unsigned dst_y,
-   enum pipe_format src_format,
-   const void *src, unsigned src_stride,
-   unsigned src_x, unsigned src_y,
-   unsigned width, unsigned height);*/
+   context = p_surf->device->context->vpipe;
+   if (!context)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   if (p_surf->video_buffer == NULL || pformat != p_surf->video_buffer->buffer_format) {
+      assert(0); // TODO Recreate resource
+      return VDP_STATUS_NO_IMPLEMENTATION;
+   }
 
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   sampler_views = p_surf->video_buffer->get_sampler_views(p_surf->video_buffer);
+   if (!sampler_views)
+      return VDP_STATUS_RESOURCES;
+
+   for (i = 0; i < 3; ++i) { //TODO put nr of planes into util format
+      struct pipe_sampler_view *sv = sampler_views[i];
+      struct pipe_box dst_box = { 0, 0, sv->texture->width0, sv->texture->height0 };
+      context->upload_sampler(context, sv, &dst_box, source_data[i], source_pitches[i], 0, 0);
+   }
+
+   return VDP_STATUS_OK;
 }
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index c5503ea554e..d326784c951 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -171,6 +171,7 @@ typedef struct
    Display *display;
    int screen;
    struct vl_screen *vscreen;
+   struct vl_context *context;
 } vlVdpDevice;
 
 typedef struct
@@ -193,13 +194,7 @@ typedef struct
 typedef struct
 {
    vlVdpDevice *device;
-   enum pipe_video_chroma_format chroma_format;
-   uint32_t width;
-   uint32_t height;
-
-   //uint32_t pitch;
-   //struct pipe_surface *psurface;
-   //uint8_t *data;
+   struct pipe_video_buffer *video_buffer;
 } vlVdpSurface;
 
 typedef struct
-- 
cgit v1.2.3


From 9d2e630cd02362bfa8f090640a55cf2dea9d64b3 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 7 Apr 2011 19:24:22 +0200
Subject: [g3dvl] move mapping/unmapping and uploading of blocks out of idct
 code

---
 src/gallium/auxiliary/vl/vl_idct.c           | 54 -------------------
 src/gallium/auxiliary/vl/vl_idct.h           | 12 -----
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 77 +++++++++++++++++++++++++---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h |  3 ++
 4 files changed, 72 insertions(+), 74 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index c92659bc5b0..a7b8a18dec3 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -702,60 +702,6 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    cleanup_intermediate(idct, buffer);
 }
 
-void
-vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
-{
-   struct pipe_resource *tex;
-
-   assert(idct && buffer);
-
-   tex = buffer->sampler_views.individual.source->texture;
-
-   struct pipe_box rect =
-   {
-      0, 0, 0,
-      tex->width0,
-      tex->height0,
-      1
-   };
-
-   buffer->tex_transfer = idct->pipe->get_transfer
-   (
-      idct->pipe, tex,
-      0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &rect
-   );
-
-   buffer->texels = idct->pipe->transfer_map(idct->pipe, buffer->tex_transfer);
-}
-
-void
-vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block)
-{
-   unsigned tex_pitch;
-   short *texels;
-
-   unsigned i;
-
-   assert(buffer);
-   assert(block);
-
-   tex_pitch = buffer->tex_transfer->stride / sizeof(short);
-   texels = buffer->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
-
-   for (i = 0; i < BLOCK_HEIGHT; ++i)
-      memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
-}
-
-void
-vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer)
-{
-   assert(idct && buffer);
-
-   idct->pipe->transfer_unmap(idct->pipe, buffer->tex_transfer);
-   idct->pipe->transfer_destroy(idct->pipe, buffer->tex_transfer);
-}
-
 void
 vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
 {
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index fedebd3ff21..cd62cde449b 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -66,9 +66,6 @@ struct vl_idct_buffer
          struct pipe_sampler_view *transpose, *intermediate;
       } individual;
    } sampler_views;
-
-   struct pipe_transfer *tex_transfer;
-   short *texels;
 };
 
 /* upload the idct matrix, which can be shared by all idct instances of a pipe */
@@ -90,15 +87,6 @@ bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
 /* cleanup a buffer of an idct instance */
 void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
-/* map a buffer for use with vl_idct_add_block */
-void vl_idct_map_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer);
-
-/* add an block of to be tranformed data a the given x and y coordinate */
-void vl_idct_add_block(struct vl_idct_buffer *buffer, unsigned x, unsigned y, short *block);
-
-/* unmaps the buffers before flushing */
-void vl_idct_unmap_buffers(struct vl_idct *idct, struct vl_idct_buffer *buffer);
-
 /* flush the buffer and start rendering, vertex buffers needs to be setup before calling this */
 void vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_verts);
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index c07b1bb369e..24f385681c2 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -50,6 +50,58 @@ static const unsigned const_empty_block_mask_420[3][2][2] = {
         { { 0x01, 0x01 },  { 0x01, 0x01 } }
 };
 
+static void
+map_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
+{
+   struct pipe_sampler_view **sampler_views;
+   struct pipe_resource *tex;
+   unsigned i;
+
+   assert(ctx && buffer);
+
+   sampler_views = buffer->idct_source->get_sampler_views(buffer->idct_source);
+   assert(sampler_views);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      tex = sampler_views[i]->texture;
+
+      struct pipe_box rect =
+      {
+         0, 0, 0,
+         tex->width0,
+         tex->height0,
+         1
+      };
+
+      buffer->tex_transfer[i] = ctx->pipe->get_transfer
+      (
+         ctx->pipe, tex,
+         0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+         &rect
+      );
+
+      buffer->texels[i] = ctx->pipe->transfer_map(ctx->pipe, buffer->tex_transfer[i]);
+   }
+}
+
+static void
+upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane, unsigned x, unsigned y, short *block)
+{
+   unsigned tex_pitch;
+   short *texels;
+
+   unsigned i;
+
+   assert(buffer);
+   assert(block);
+
+   tex_pitch = buffer->tex_transfer[plane]->stride / sizeof(short);
+   texels = buffer->texels[plane] + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
+
+   for (i = 0; i < BLOCK_HEIGHT; ++i)
+      memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
+}
+
 static void
 upload_buffer(struct vl_mpeg12_decoder *ctx,
               struct vl_mpeg12_buffer *buffer,
@@ -67,7 +119,7 @@ upload_buffer(struct vl_mpeg12_decoder *ctx,
    for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
          if (mb->cbp & (*ctx->empty_block_mask)[0][y][x]) {
-            vl_idct_add_block(&buffer->idct[0], mb->mbx * 2 + x, mb->mby * 2 + y, blocks);
+            upload_block(buffer, 0, mb->mbx * 2 + x, mb->mby * 2 + y, blocks);
             blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
          }
       }
@@ -78,12 +130,25 @@ upload_buffer(struct vl_mpeg12_decoder *ctx,
 
    for (tb = 1; tb < 3; ++tb) {
       if (mb->cbp & (*ctx->empty_block_mask)[tb][0][0]) {
-         vl_idct_add_block(&buffer->idct[tb], mb->mbx, mb->mby, blocks);
+         upload_block(buffer, tb, mb->mbx, mb->mby, blocks);
          blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
       }
    }
 }
 
+static void
+unmap_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
+{
+   unsigned i;
+
+   assert(ctx && buffer);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      ctx->pipe->transfer_unmap(ctx->pipe, buffer->tex_transfer[i]);
+      ctx->pipe->transfer_destroy(ctx->pipe, buffer->tex_transfer[i]);
+   }
+}
+
 static void
 vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
 {
@@ -115,9 +180,7 @@ vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)
    assert(dec);
 
    vl_vb_map(&buf->vertex_stream, dec->pipe);
-   vl_idct_map_buffers(&dec->idct_y, &buf->idct[0]);
-   vl_idct_map_buffers(&dec->idct_c, &buf->idct[1]);
-   vl_idct_map_buffers(&dec->idct_c, &buf->idct[2]);
+   map_buffers(dec, buf);
 }
 
 static void
@@ -156,9 +219,7 @@ vl_mpeg12_buffer_unmap(struct pipe_video_decode_buffer *buffer)
    assert(dec);
 
    vl_vb_unmap(&buf->vertex_stream, dec->pipe);
-   vl_idct_unmap_buffers(&dec->idct_y, &buf->idct[0]);
-   vl_idct_unmap_buffers(&dec->idct_c, &buf->idct[1]);
-   vl_idct_unmap_buffers(&dec->idct_c, &buf->idct[2]);
+   unmap_buffers(dec, buf);
 }
 
 static void
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index f7dc2d5799a..69d649b179a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -76,6 +76,9 @@ struct vl_mpeg12_buffer
 
    struct vl_idct_buffer idct[VL_MAX_PLANES];
    struct vl_mpeg12_mc_buffer mc[VL_MAX_PLANES];
+
+   struct pipe_transfer *tex_transfer[VL_MAX_PLANES];
+   short *texels[VL_MAX_PLANES];
 };
 
 /* drivers can call this function in their pipe_video_context constructors and pass it
-- 
cgit v1.2.3


From fcdf50f74befad8d89eb3f9cdfd88b82d1daa98c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 7 Apr 2011 20:10:55 +0200
Subject: [g3dvl] add support for different decoding entry points

---
 src/gallium/auxiliary/vl/vl_context.c            |   5 +-
 src/gallium/auxiliary/vl/vl_idct.c               |   6 +-
 src/gallium/auxiliary/vl/vl_idct.h               |   2 +-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c     | 148 +++++++++++++----------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h     |   3 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  14 ++-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   3 +-
 src/gallium/include/pipe/p_defines.h             |   7 ++
 src/gallium/include/pipe/p_video_context.h       |   2 +
 src/gallium/state_trackers/xorg/xvmc/context.c   |   5 +-
 10 files changed, 113 insertions(+), 82 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index 5a55f3b9607..2bc027fedc5 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -173,6 +173,7 @@ error_map:
 static struct pipe_video_decoder *
 vl_context_create_decoder(struct pipe_video_context *context,
                           enum pipe_video_profile profile,
+                          enum pipe_video_entrypoint entrypoint,
                           enum pipe_video_chroma_format chroma_format,
                           unsigned width, unsigned height)
 {
@@ -187,8 +188,8 @@ vl_context_create_decoder(struct pipe_video_context *context,
 
    switch (u_reduce_video_profile(profile)) {
       case PIPE_VIDEO_CODEC_MPEG12:
-         return vl_create_mpeg12_decoder(context, ctx->pipe, profile, chroma_format,
-                                         buffer_width, buffer_height);
+         return vl_create_mpeg12_decoder(context, ctx->pipe, profile, entrypoint,
+                                         chroma_format, buffer_width, buffer_height);
       default:
          return NULL;
    }
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index a7b8a18dec3..dc4a9bbb8c9 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -38,8 +38,6 @@
 #include <tgsi/tgsi_ureg.h>
 #include "vl_types.h"
 
-#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
-
 #define NR_RENDER_TARGETS 4
 
 enum VS_OUTPUT
@@ -534,10 +532,8 @@ cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 }
 
 struct pipe_sampler_view *
-vl_idct_upload_matrix(struct pipe_context *pipe)
+vl_idct_upload_matrix(struct pipe_context *pipe, float scale)
 {
-   const float scale = sqrtf(SCALE_FACTOR_16_TO_9);
-
    struct pipe_resource tex_templ, *matrix;
    struct pipe_sampler_view sv_templ, *sv;
    struct pipe_transfer *buf_transfer;
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index cd62cde449b..5d3784ce6c0 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -69,7 +69,7 @@ struct vl_idct_buffer
 };
 
 /* upload the idct matrix, which can be shared by all idct instances of a pipe */
-struct pipe_sampler_view *vl_idct_upload_matrix(struct pipe_context *pipe);
+struct pipe_sampler_view *vl_idct_upload_matrix(struct pipe_context *pipe, float scale);
 
 /* init an idct instance */
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 24f385681c2..31163b9d08e 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -25,17 +25,8 @@
  *
  **************************************************************************/
 
-//#include <pipe/p_shader_tokens.h>
-
-//#include "util/u_inlines.h"
-
-//#include <util/u_inlines.h>
-//#include <util/u_memory.h>
-//#include <util/u_keymap.h>
-
-//#include <util/u_video.h>
-//#include <util/u_surface.h>
-//#include <util/u_sampler.h>
+#include <math.h>
+#include <assert.h>
 
 #include <util/u_memory.h>
 #include <util/u_rect.h>
@@ -44,6 +35,8 @@
 #include "vl_mpeg12_decoder.h"
 #include "vl_defines.h"
 
+#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
+
 static const unsigned const_empty_block_mask_420[3][2][2] = {
         { { 0x20, 0x10 },  { 0x08, 0x04 } },
         { { 0x02, 0x02 },  { 0x02, 0x02 } },
@@ -59,7 +52,10 @@ map_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
 
    assert(ctx && buffer);
 
-   sampler_views = buffer->idct_source->get_sampler_views(buffer->idct_source);
+   if (ctx->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      sampler_views = buffer->idct_source->get_sampler_views(buffer->idct_source);
+   else
+      sampler_views = buffer->mc_source->get_sampler_views(buffer->mc_source);
    assert(sampler_views);
 
    for (i = 0; i < VL_MAX_PLANES; ++i) {
@@ -156,12 +152,14 @@ vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
    assert(buf && dec);
 
-   buf->idct_source->destroy(buf->idct_source);
-   buf->idct_2_mc->destroy(buf->idct_2_mc);
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      buf->idct_source->destroy(buf->idct_source);
+      vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
+      vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
+      vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
+   }
+   buf->mc_source->destroy(buf->mc_source);
    vl_vb_cleanup(&buf->vertex_stream);
-   vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
-   vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
-   vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
    vl_mpeg12_mc_cleanup_buffer(&buf->mc[0]);
    vl_mpeg12_mc_cleanup_buffer(&buf->mc[1]);
    vl_mpeg12_mc_cleanup_buffer(&buf->mc[2]);
@@ -238,8 +236,10 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa);
 
    vl_mpeg12_mc_renderer_cleanup(&dec->mc);
-   vl_idct_cleanup(&dec->idct_y);
-   vl_idct_cleanup(&dec->idct_c);
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      vl_idct_cleanup(&dec->idct_y);
+      vl_idct_cleanup(&dec->idct_c);
+   }
    dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves[0]);
    dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves[1]);
    dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves[2]);
@@ -257,7 +257,7 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
       PIPE_FORMAT_R16G16B16A16_SNORM
    };
 
-   const enum pipe_format idct_2_mc_formats[3] = {
+   const enum pipe_format mc_source_formats[3] = {
       PIPE_FORMAT_R16_SNORM,
       PIPE_FORMAT_R16_SNORM,
       PIPE_FORMAT_R16_SNORM
@@ -291,43 +291,47 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    if (!buffer->vertex_bufs.individual.stream.buffer)
       goto error_vertex_stream;
 
-   buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                              dec->base.width / 4, dec->base.height, 1,
-                                              dec->base.chroma_format, 3,
-                                              idct_source_formats,
-                                              PIPE_USAGE_STREAM);
-   if (!buffer->idct_source)
-      goto error_idct_source;
-
-   buffer->idct_2_mc = vl_video_buffer_init(dec->base.context, dec->pipe,
+   buffer->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
                                             dec->base.width, dec->base.height, 1,
                                             dec->base.chroma_format, 3,
-                                            idct_2_mc_formats,
+                                            mc_source_formats,
                                             PIPE_USAGE_STATIC);
-   if (!buffer->idct_2_mc)
-      goto error_idct_2_mc;
 
-   idct_views = buffer->idct_source->get_sampler_views(buffer->idct_source);
-   if (!idct_views)
-      goto error_idct_views;
+   if (!buffer->mc_source)
+      goto error_mc_source;
 
-   idct_surfaces = buffer->idct_2_mc->get_surfaces(buffer->idct_2_mc);
-   if (!idct_surfaces)
-      goto error_idct_surfaces;
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                                 dec->base.width / 4, dec->base.height, 1,
+                                                 dec->base.chroma_format, 3,
+                                                 idct_source_formats,
+                                                 PIPE_USAGE_STREAM);
+      if (!buffer->idct_source)
+         goto error_idct_source;
 
-   if (!vl_idct_init_buffer(&dec->idct_y, &buffer->idct[0],
-                            idct_views[0], idct_surfaces[0]))
-      goto error_idct_y;
 
-   if (!vl_idct_init_buffer(&dec->idct_c, &buffer->idct[1],
-                            idct_views[1], idct_surfaces[1]))
-      goto error_idct_cb;
+      idct_views = buffer->idct_source->get_sampler_views(buffer->idct_source);
+      if (!idct_views)
+         goto error_idct_views;
+
+      idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source);
+      if (!idct_surfaces)
+         goto error_idct_surfaces;
 
-   if (!vl_idct_init_buffer(&dec->idct_c, &buffer->idct[2],
-                            idct_views[2], idct_surfaces[2]))
-      goto error_idct_cr;
+      if (!vl_idct_init_buffer(&dec->idct_y, &buffer->idct[0],
+                               idct_views[0], idct_surfaces[0]))
+         goto error_idct_y;
+
+      if (!vl_idct_init_buffer(&dec->idct_c, &buffer->idct[1],
+                               idct_views[1], idct_surfaces[1]))
+         goto error_idct_cb;
+
+      if (!vl_idct_init_buffer(&dec->idct_c, &buffer->idct[2],
+                               idct_views[2], idct_surfaces[2]))
+         goto error_idct_cr;
+   }
 
-   mc_views = buffer->idct_2_mc->get_sampler_views(buffer->idct_2_mc);
+   mc_views = buffer->mc_source->get_sampler_views(buffer->mc_source);
    if (!mc_views)
       goto error_mc_views;
 
@@ -350,23 +354,27 @@ error_mc_cb:
 
 error_mc_y:
 error_mc_views:
-   vl_idct_cleanup_buffer(&dec->idct_c, &buffer->idct[2]);
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      vl_idct_cleanup_buffer(&dec->idct_c, &buffer->idct[2]);
 
 error_idct_cr:
-   vl_idct_cleanup_buffer(&dec->idct_c, &buffer->idct[1]);
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      vl_idct_cleanup_buffer(&dec->idct_c, &buffer->idct[1]);
 
 error_idct_cb:
-   vl_idct_cleanup_buffer(&dec->idct_y, &buffer->idct[0]);
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      vl_idct_cleanup_buffer(&dec->idct_y, &buffer->idct[0]);
 
 error_idct_y:
 error_idct_surfaces:
 error_idct_views:
-   buffer->idct_2_mc->destroy(buffer->idct_2_mc);
-
-error_idct_2_mc:
-   buffer->idct_source->destroy(buffer->idct_source);
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      buffer->idct_source->destroy(buffer->idct_source);
 
 error_idct_source:
+   buffer->mc_source->destroy(buffer->mc_source);
+
+error_mc_source:
    vl_vb_cleanup(&buffer->vertex_stream);
 
 error_vertex_stream:
@@ -408,7 +416,9 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 
    for (i = 0; i < VL_MAX_PLANES; ++i) {
       dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves[i]);
-      vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], ne_num);
+
+      if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+         vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], ne_num);
 
       sv_refs[0] = sv_past ? sv_past[i] : NULL;
       sv_refs[1] = sv_future ? sv_future[i] : NULL;
@@ -516,11 +526,7 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_
    unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
    struct pipe_sampler_view *idct_matrix;
 
-   /* TODO: Implement 422, 444 */
-   assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-   dec->empty_block_mask = &const_empty_block_mask_420;
-
-   if (!(idct_matrix = vl_idct_upload_matrix(dec->pipe)))
+   if (!(idct_matrix = vl_idct_upload_matrix(dec->pipe, sqrt(SCALE_FACTOR_16_TO_9))))
       goto error_idct_matrix;
 
    if (!vl_idct_init(&dec->idct_y, dec->pipe, buffer_width, buffer_height,
@@ -565,6 +571,7 @@ struct pipe_video_decoder *
 vl_create_mpeg12_decoder(struct pipe_video_context *context,
                          struct pipe_context *pipe,
                          enum pipe_video_profile profile,
+                         enum pipe_video_entrypoint entrypoint,
                          enum pipe_video_chroma_format chroma_format,
                          unsigned width, unsigned height)
 {
@@ -580,6 +587,7 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
 
    dec->base.context = context;
    dec->base.profile = profile;
+   dec->base.entrypoint = entrypoint;
    dec->base.chroma_format = chroma_format;
    dec->base.width = width;
    dec->base.height = height;
@@ -598,10 +606,16 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    dec->base.width = align(width, MACROBLOCK_WIDTH);
    dec->base.height = align(height, MACROBLOCK_HEIGHT);
 
-   if (!init_idct(dec, dec->base.width, dec->base.height))
-      goto error_idct;
+   /* TODO: Implement 422, 444 */
+   assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+   dec->empty_block_mask = &const_empty_block_mask_420;
 
-   if (!vl_mpeg12_mc_renderer_init(&dec->mc, dec->pipe, dec->base.width, dec->base.height))
+   if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      if (!init_idct(dec, dec->base.width, dec->base.height))
+         goto error_idct;
+
+   if (!vl_mpeg12_mc_renderer_init(&dec->mc, dec->pipe, dec->base.width, dec->base.height,
+                                   entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 1.0f : SCALE_FACTOR_16_TO_9))
       goto error_mc;
 
    if (!init_pipe_state(dec))
@@ -613,8 +627,10 @@ error_pipe_state:
    vl_mpeg12_mc_renderer_cleanup(&dec->mc);
 
 error_mc:
-   vl_idct_cleanup(&dec->idct_y);
-   vl_idct_cleanup(&dec->idct_c);
+   if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      vl_idct_cleanup(&dec->idct_y);
+      vl_idct_cleanup(&dec->idct_c);
+   }
 
 error_idct:
    FREE(dec);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 69d649b179a..e90f8d3880b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -64,7 +64,7 @@ struct vl_mpeg12_buffer
    struct vl_vertex_buffer vertex_stream;
 
    struct pipe_video_buffer *idct_source;
-   struct pipe_video_buffer *idct_2_mc;
+   struct pipe_video_buffer *mc_source;
 
    union
    {
@@ -87,6 +87,7 @@ struct pipe_video_decoder *
 vl_create_mpeg12_decoder(struct pipe_video_context *context,
                          struct pipe_context *pipe,
                          enum pipe_video_profile profile,
+                         enum pipe_video_entrypoint entrypoint,
                          enum pipe_video_chroma_format chroma_format,
                          unsigned width, unsigned height);
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 912dea3c57f..455aa52919e 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -216,7 +216,7 @@ calc_field(struct ureg_program *shader)
 }
 
 static struct ureg_dst
-fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field)
+fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field, float scale)
 {
    struct ureg_src tc[2], sampler;
    struct ureg_dst texel, t_tc;
@@ -250,6 +250,9 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
    ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
 
+   if (scale != 1.0f)
+      ureg_MUL(shader, texel, ureg_src(texel), ureg_imm1f(shader, scale));
+
    ureg_release_temporary(shader, t_tc);
 
    return texel;
@@ -308,7 +311,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 }
 
 static void *
-create_frag_shader(struct vl_mpeg12_mc_renderer *r)
+create_frag_shader(struct vl_mpeg12_mc_renderer *r, float scale)
 {
    struct ureg_program *shader;
    struct ureg_dst result;
@@ -322,7 +325,7 @@ create_frag_shader(struct vl_mpeg12_mc_renderer *r)
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    field = calc_field(shader);
-   texel = fetch_ycbcr(r, shader, field);
+   texel = fetch_ycbcr(r, shader, field, scale);
 
    result = fetch_ref(shader, field);
 
@@ -424,7 +427,8 @@ bool
 vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                            struct pipe_context *pipe,
                            unsigned buffer_width,
-                           unsigned buffer_height)
+                           unsigned buffer_height,
+                           float scale)
 {
    struct pipe_resource tex_templ, *tex_dummy;
    struct pipe_sampler_view sampler_view;
@@ -445,7 +449,7 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    if (!renderer->vs)
       goto error_vs_shaders;
 
-   renderer->fs = create_frag_shader(renderer);
+   renderer->fs = create_frag_shader(renderer, scale);
    if (!renderer->fs)
       goto error_fs_shaders;
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 052d7d6a30f..c3efda524a8 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -71,7 +71,8 @@ struct vl_mpeg12_mc_buffer
 bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
                                 struct pipe_context *pipe,
                                 unsigned picture_width,
-                                unsigned picture_height);
+                                unsigned picture_height,
+                                float scale);
 
 void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index e9d47983e1b..e3cc28ba476 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -514,6 +514,13 @@ enum pipe_video_profile
    PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH
 };
 
+enum pipe_video_entrypoint
+{
+   PIPE_VIDEO_ENTRYPOINT_UNKNOWN,
+   PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
+   PIPE_VIDEO_ENTRYPOINT_IDCT,
+   PIPE_VIDEO_ENTRYPOINT_MC
+};
 
 /**
  * Composite query types
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 33c64baf7c4..21d0581226d 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -112,6 +112,7 @@ struct pipe_video_context
     */
    struct pipe_video_decoder *(*create_decoder)(struct pipe_video_context *context,
                                                 enum pipe_video_profile profile,
+                                                enum pipe_video_entrypoint entrypoint,
                                                 enum pipe_video_chroma_format chroma_format,
                                                 unsigned width, unsigned height);
 
@@ -137,6 +138,7 @@ struct pipe_video_decoder
    struct pipe_video_context *context;
 
    enum pipe_video_profile profile;
+   enum pipe_video_entrypoint entrypoint;
    enum pipe_video_chroma_format chroma_format;
    unsigned width;
    unsigned height;
diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index 2690f8046a8..6f136f2b121 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -209,7 +209,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Unsupported chroma format.\n");
       return BadImplementation;
    }
-   if (mc_type != (XVMC_IDCT | XVMC_MOCOMP | XVMC_MPEG_2)) {
+   if ((mc_type & ~XVMC_IDCT) != (XVMC_MOCOMP | XVMC_MPEG_2)) {
       XVMC_MSG(XVMC_ERR, "[XvMC] Cannot decode requested surface type. Non-MPEG2/Mocomp/iDCT acceleration unsupported.\n");
       return BadImplementation;
    }
@@ -241,6 +241,9 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
 
    context_priv->decoder = vctx->vpipe->create_decoder(vctx->vpipe,
                                                        ProfileToPipe(mc_type),
+                                                       (mc_type & XVMC_IDCT) ?
+                                                          PIPE_VIDEO_ENTRYPOINT_IDCT :
+                                                          PIPE_VIDEO_ENTRYPOINT_MC,
                                                        FormatToPipe(chroma_format),
                                                        width, height);
 
-- 
cgit v1.2.3


From 30c4a07310e880d6ccb2f66efc30e123b8ae60e2 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 7 Apr 2011 22:59:48 +0200
Subject: [g3dvl] create composite buffer with USAGE_STREAM

Also add PIPE_TRANSFER_DONTBLOCK to mapping options
---
 src/gallium/auxiliary/vl/vl_compositor.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 46579a88ba7..b601f32d453 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -286,12 +286,11 @@ init_buffers(struct vl_compositor *c)
     */
    c->vertex_buf.stride = sizeof(struct vertex4f);
    c->vertex_buf.buffer_offset = 0;
-   /* XXX: Create with DYNAMIC or STREAM */
    c->vertex_buf.buffer = pipe_buffer_create
    (
       c->pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
-      PIPE_USAGE_STATIC,
+      PIPE_USAGE_STREAM,
       sizeof(struct vertex4f) * (VL_COMPOSITOR_MAX_LAYERS + 1) * 4
    );
 
@@ -382,7 +381,7 @@ gen_vertex_data(struct vl_compositor *c, struct pipe_video_rect *dst_rect, struc
    assert(dst_rect);
 
    vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
-                        PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+                        PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | PIPE_TRANSFER_DONTBLOCK,
                         &buf_transfer);
 
    if (!vb)
-- 
cgit v1.2.3


From 19402275fb36fc8cb5afaab63b224487e00313f9 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 7 Apr 2011 23:04:10 +0200
Subject: [g3dvl] stop waiting for replay of DRI2CopyRegion

---
 src/gallium/winsys/g3dvl/dri/dri2.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/gallium/winsys/g3dvl/dri/dri2.c b/src/gallium/winsys/g3dvl/dri/dri2.c
index 23fb7780882..12ed6d0ab74 100644
--- a/src/gallium/winsys/g3dvl/dri/dri2.c
+++ b/src/gallium/winsys/g3dvl/dri/dri2.c
@@ -213,7 +213,7 @@ DRI2QueryVersion(Display * dpy, int *major, int *minor)
 	   nevents = 2;
 	   break;
    }
-	
+
    for (i = 0; i < nevents; i++) {
        XESetWireToEvent (dpy, info->codes->first_event + i, DRI2WireToEvent);
        XESetEventToWire (dpy, info->codes->first_event + i, DRI2EventToWire);
@@ -466,7 +466,6 @@ DRI2CopyRegion(Display * dpy, XID drawable, XserverRegion region,
 {
    XExtDisplayInfo *info = DRI2FindDisplay(dpy);
    xDRI2CopyRegionReq *req;
-   xDRI2CopyRegionReply rep;
 
    XextSimpleCheckExtension(dpy, info, dri2ExtensionName);
 
@@ -479,8 +478,6 @@ DRI2CopyRegion(Display * dpy, XID drawable, XserverRegion region,
    req->dest = dest;
    req->src = src;
 
-   _XReply(dpy, (xReply *) & rep, 0, xFalse);
-
    UnlockDisplay(dpy);
    SyncHandle();
 }
-- 
cgit v1.2.3


From b98b58c76a2560b1db603e62f96c6641cedfa8e8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Apr 2011 19:21:13 +0200
Subject: vdpau: implement output surface creation

---
 src/gallium/state_trackers/vdpau/output.c        | 50 ++++++++++++++++++++++--
 src/gallium/state_trackers/vdpau/vdpau_private.h |  5 +--
 2 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
index 90c66481d0a..e95f333b02a 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -1,6 +1,7 @@
 /**************************************************************************
  *
  * Copyright 2010 Thomas Balling Sørensen.
+ * Copyright 2011 Christian König.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,17 +26,24 @@
  *
  **************************************************************************/
 
-#include "vdpau_private.h"
 #include <vdpau/vdpau.h>
+
 #include <util/u_debug.h>
 #include <util/u_memory.h>
 
+#include "vdpau_private.h"
+
 VdpStatus
 vlVdpOutputSurfaceCreate(VdpDevice device,
                          VdpRGBAFormat rgba_format,
                          uint32_t width, uint32_t height,
                          VdpOutputSurface  *surface)
 {
+   struct pipe_video_context *context;
+   struct pipe_resource res_tmpl, *res;
+   struct pipe_sampler_view sv_templ;
+   struct pipe_surface surf_templ;
+
    vlVdpOutputSurface *vlsurface = NULL;
 
    debug_printf("[VDPAU] Creating output surface\n");
@@ -46,13 +54,47 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
 
+   context = dev->context->vpipe;
+   if (!context)
+      return VDP_STATUS_INVALID_HANDLE;
+
    vlsurface = CALLOC(1, sizeof(vlVdpOutputSurface));
    if (!vlsurface)
       return VDP_STATUS_RESOURCES;
 
-   vlsurface->width = width;
-   vlsurface->height = height;
-   vlsurface->format = FormatRGBAToPipe(rgba_format);
+   memset(&res_tmpl, 0, sizeof(res_tmpl));
+
+   res_tmpl.target = PIPE_TEXTURE_2D;
+   res_tmpl.format = FormatRGBAToPipe(rgba_format);
+   res_tmpl.width0 = width;
+   res_tmpl.height0 = height;
+   res_tmpl.depth0 = 1;
+   res_tmpl.array_size = 1;
+   res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   res_tmpl.usage = PIPE_USAGE_STATIC;
+
+   res = context->screen->resource_create(context->screen, &res_tmpl);
+   if (!res) {
+      FREE(dev);
+      return VDP_STATUS_ERROR;
+   }
+
+   memset(&sv_templ, 0, sizeof(sv_templ));
+   u_sampler_view_default_template(&sv_templ, res, res->format);
+   vlsurface->sampler_view = context->create_sampler_view(context, res, &sv_templ);
+   if (!vlsurface->sampler_view) {
+      FREE(dev);
+      return VDP_STATUS_ERROR;
+   }
+
+   memset(&surf_templ, 0, sizeof(surf_templ));
+   surf_templ.format = res->format;
+   surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   vlsurface->surface = context->create_surface(context, res, &surf_templ);
+   if (!vlsurface->surface) {
+      FREE(dev);
+      return VDP_STATUS_ERROR;
+   }
 
    *surface = vlAddDataHTAB(vlsurface);
    if (*surface == 0) {
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index d326784c951..dca755eb8d4 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -200,9 +200,8 @@ typedef struct
 typedef struct
 {
    vlVdpDevice *device;
-   uint32_t width;
-   uint32_t height;
-   enum pipe_format format;
+   struct pipe_surface *surface;
+   struct pipe_sampler_view *sampler_view;
 } vlVdpOutputSurface;
 
 typedef struct
-- 
cgit v1.2.3


From 6710e690f6ccd2c917d6a39be7d55a037470fccb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Apr 2011 20:03:35 +0200
Subject: vdpau: add compositor to presentation queue

---
 src/gallium/state_trackers/vdpau/presentation.c  | 19 ++++++++++++++++---
 src/gallium/state_trackers/vdpau/vdpau_private.h |  2 +-
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index d4f67379ed2..fadc57e70c0 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -41,9 +41,11 @@ vlVdpPresentationQueueCreate(VdpDevice device,
                              VdpPresentationQueueTarget presentation_queue_target,
                              VdpPresentationQueue *presentation_queue)
 {
-   debug_printf("[VDPAU] Creating PresentationQueue\n");
-   VdpStatus ret;
    vlVdpPresentationQueue *pq = NULL;
+   struct pipe_video_context *context;
+   VdpStatus ret;
+
+   _debug_printf("[VDPAU] Creating PresentationQueue\n");
 
    if (!presentation_queue)
       return VDP_STATUS_INVALID_POINTER;
@@ -59,10 +61,19 @@ vlVdpPresentationQueueCreate(VdpDevice device,
    if (dev != pqt->device)
       return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
 
+   context = dev->context->vpipe;
+
    pq = CALLOC(1, sizeof(vlVdpPresentationQueue));
    if (!pq)
       return VDP_STATUS_RESOURCES;
 
+   pq->device = dev;
+   pq->compositor = context->create_compositor(context);
+   if (!pq->compositor) {
+      ret = VDP_STATUS_ERROR;
+      goto no_compositor;
+   }
+
    *presentation_queue = vlAddDataHTAB(pq);
    if (*presentation_queue == 0) {
       ret = VDP_STATUS_ERROR;
@@ -71,6 +82,7 @@ vlVdpPresentationQueueCreate(VdpDevice device,
 
    return VDP_STATUS_OK;
 no_handle:
+no_compositor:
    FREE(pq);
    return ret;
 }
@@ -129,7 +141,8 @@ vlVdpPresentationQueueBlockUntilSurfaceIdle(VdpPresentationQueue presentation_qu
    if (!first_presentation_time)
       return VDP_STATUS_INVALID_POINTER;
 
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   //return VDP_STATUS_NO_IMPLEMENTATION;
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index dca755eb8d4..f1f94ea92ac 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -183,7 +183,7 @@ typedef struct
 typedef struct
 {
    vlVdpDevice *device;
-   Drawable drawable;
+   struct pipe_video_compositor *compositor;
 } vlVdpPresentationQueue;
 
 typedef struct
-- 
cgit v1.2.3


From 574ffb440dbd878d51fc9b9794a6396cbe6f75bb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Apr 2011 20:12:30 +0200
Subject: vdpau: add compositor to mixer

---
 src/gallium/state_trackers/vdpau/mixer.c         | 7 ++++++-
 src/gallium/state_trackers/vdpau/vdpau_private.h | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index 808ff9e9ce8..86ac099a7d8 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -39,8 +39,9 @@ vlVdpVideoMixerCreate(VdpDevice device,
                       void const *const *parameter_values,
                       VdpVideoMixer *mixer)
 {
-   VdpStatus ret;
    vlVdpVideoMixer *vmixer = NULL;
+   struct pipe_video_context *context;
+   VdpStatus ret;
 
    debug_printf("[VDPAU] Creating VideoMixer\n");
 
@@ -48,11 +49,15 @@ vlVdpVideoMixerCreate(VdpDevice device,
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
 
+   context = dev->context->vpipe;
+
    vmixer = CALLOC(1, sizeof(vlVdpVideoMixer));
    if (!vmixer)
       return VDP_STATUS_RESOURCES;
 
    vmixer->device = dev;
+   vmixer->compositor = context->create_compositor(context);
+
    /*
     * TODO: Handle features and parameters
     * */
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index f1f94ea92ac..25f289aa726 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -189,6 +189,7 @@ typedef struct
 typedef struct
 {
    vlVdpDevice *device;
+   struct pipe_video_compositor *compositor;
 } vlVdpVideoMixer;
 
 typedef struct
-- 
cgit v1.2.3


From 255033e4819b096491dd987c3ca4d8ee32a7cdb6 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Apr 2011 22:07:30 +0200
Subject: [g3dvl] use scissor to handle compositor dst_area

---
 src/gallium/auxiliary/vl/vl_compositor.c | 45 ++++++++++++++++++++++++--------
 src/gallium/auxiliary/vl/vl_compositor.h |  1 +
 2 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index b601f32d453..1278faea4c9 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -216,6 +216,7 @@ static void cleanup_shaders(struct vl_compositor *c)
 static bool
 init_pipe_state(struct vl_compositor *c)
 {
+   struct pipe_rasterizer_state rast;
    struct pipe_sampler_state sampler;
    struct pipe_blend_state blend;
 
@@ -263,6 +264,21 @@ init_pipe_state(struct vl_compositor *c)
    blend.dither = 0;
    c->blend = c->pipe->create_blend_state(c->pipe, &blend);
 
+   memset(&rast, 0, sizeof rast);
+   rast.flatshade = 1;
+   rast.front_ccw = 1;
+   rast.cull_face = PIPE_FACE_NONE;
+   rast.fill_back = PIPE_POLYGON_MODE_FILL;
+   rast.fill_front = PIPE_POLYGON_MODE_FILL;
+   rast.scissor = 1;
+   rast.line_width = 1;
+   rast.point_size_per_vertex = 1;
+   rast.offset_units = 1;
+   rast.offset_scale = 1;
+   rast.gl_rasterization_rules = 1;
+
+   c->rast = c->pipe->create_rasterizer_state(c->pipe, &rast);
+
    return true;
 }
 
@@ -272,6 +288,7 @@ static void cleanup_pipe_state(struct vl_compositor *c)
 
    c->pipe->delete_sampler_state(c->pipe, c->sampler);
    c->pipe->delete_blend_state(c->pipe, c->blend);
+   c->pipe->delete_rasterizer_state(c->pipe, c->rast);
 }
 
 static bool
@@ -371,14 +388,13 @@ gen_rect_verts(struct vertex4f *vb,
 }
 
 static void
-gen_vertex_data(struct vl_compositor *c, struct pipe_video_rect *dst_rect, struct vertex2f *dst_inv_size)
+gen_vertex_data(struct vl_compositor *c)
 {
    struct vertex4f *vb;
    struct pipe_transfer *buf_transfer;
    unsigned i;
 
    assert(c);
-   assert(dst_rect);
 
    vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
                         PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | PIPE_TRANSFER_DONTBLOCK,
@@ -392,10 +408,7 @@ gen_vertex_data(struct vl_compositor *c, struct pipe_video_rect *dst_rect, struc
          struct pipe_sampler_view *sv = c->layers[i].sampler_views[0];
          struct vertex2f src_inv_size = {1.0f / sv->texture->width0, 1.0f / sv->texture->height0};
 
-         if (&c->layers[i].fs == c->fs_video_buffer)
-            gen_rect_verts(vb, &c->layers[i].src_rect, &src_inv_size, dst_rect, dst_inv_size);
-         else
-            gen_rect_verts(vb, &c->layers[i].src_rect, &src_inv_size, &c->layers[i].dst_rect, &src_inv_size);
+         gen_rect_verts(vb, &c->layers[i].src_rect, &src_inv_size, &c->layers[i].dst_rect, &src_inv_size);
 
          vb += 4;
       }
@@ -552,12 +565,11 @@ vl_compositor_render(struct pipe_video_compositor *compositor,
                      struct pipe_fence_handle      **fence)
 {
    struct vl_compositor *c = (struct vl_compositor *)compositor;
-   struct vertex2f dst_inv_size;
+   struct pipe_scissor_state scissor;
    void *samplers[3];
 
    assert(compositor);
    assert(dst_surface);
-   assert(dst_area);
 
    c->fb_state.width = dst_surface->width;
    c->fb_state.height = dst_surface->height;
@@ -566,13 +578,23 @@ vl_compositor_render(struct pipe_video_compositor *compositor,
    c->viewport.scale[0] = dst_surface->width;
    c->viewport.scale[1] = dst_surface->height;
 
-   dst_inv_size.x = 1.0f / dst_surface->width;
-   dst_inv_size.y = 1.0f / dst_surface->height;
+   if (dst_area) {
+      scissor.minx = dst_area->x;
+      scissor.miny = dst_area->y;
+      scissor.maxx = dst_area->x + dst_area->w;
+      scissor.maxy = dst_area->y + dst_area->h;
+   } else {
+      scissor.minx = 0;
+      scissor.miny = 0;
+      scissor.maxx = dst_surface->width;
+      scissor.maxy = dst_surface->height;
+   }
 
    samplers[0] = samplers[1] = samplers[2] = c->sampler;
 
-   gen_vertex_data(c, dst_area, &dst_inv_size);
+   gen_vertex_data(c);
 
+   c->pipe->set_scissor_state(c->pipe, &scissor);
    c->pipe->set_framebuffer_state(c->pipe, &c->fb_state);
    c->pipe->set_viewport_state(c->pipe, &c->viewport);
    c->pipe->bind_fragment_sampler_states(c->pipe, 3, &samplers[0]);
@@ -581,6 +603,7 @@ vl_compositor_render(struct pipe_video_compositor *compositor,
    c->pipe->bind_vertex_elements_state(c->pipe, c->vertex_elems_state);
    c->pipe->set_constant_buffer(c->pipe, PIPE_SHADER_FRAGMENT, 0, c->csc_matrix);
    c->pipe->bind_blend_state(c->pipe, c->blend);
+   c->pipe->bind_rasterizer_state(c->pipe, c->rast);
 
    draw_layers(c);
 
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 0e4badb65d2..c7da533c988 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -56,6 +56,7 @@ struct vl_compositor
 
    void *sampler;
    void *blend;
+   void *rast;
    void *vertex_elems_state;
 
    void *vs;
-- 
cgit v1.2.3


From f3ead63e7023f61557cb92be30cae6fe9efb280a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Apr 2011 22:13:46 +0200
Subject: vdpau: get at least the very basic mixer functions working

---
 src/gallium/state_trackers/vdpau/mixer.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index 86ac099a7d8..2fe0f1ca6f4 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -111,11 +111,28 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
                                 uint32_t layer_count,
                                 VdpLayer const *layers)
 {
-   if (!(background_source_rect && video_surface_past && video_surface_future &&
-         video_source_rect && destination_rect && destination_video_rect && layers))
-      return VDP_STATUS_INVALID_POINTER;
+   vlVdpVideoMixer *vmixer;
+   vlVdpSurface *surf;
+   vlVdpOutputSurface *dst;
+
+   vmixer = vlGetDataHTAB(mixer);
+   if (!vmixer)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   surf = vlGetDataHTAB(video_surface_current);
+   if (!surf)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   dst = vlGetDataHTAB(destination_surface);
+   if (!dst)
+      return VDP_STATUS_INVALID_HANDLE;
 
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   vmixer->compositor->clear_layers(vmixer->compositor);
+   vmixer->compositor->set_buffer_layer(vmixer->compositor, 0, surf->video_buffer, NULL, NULL);
+   vmixer->compositor->render_picture(vmixer->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME,
+                                      dst->surface, NULL, NULL);
+
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
-- 
cgit v1.2.3


From cae77aa80b3629a147ce3ae4526646014efae595 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 9 Apr 2011 01:32:25 +0200
Subject: vdpau: Implement basic output functionality

Even with totally wrong color space conversion
we finally se a picture with VDPAU. Yeah!
---
 src/gallium/state_trackers/vdpau/presentation.c  | 32 +++++++++++++++++++++++-
 src/gallium/state_trackers/vdpau/surface.c       |  2 +-
 src/gallium/state_trackers/vdpau/vdpau_private.h |  1 +
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index fadc57e70c0..fdd0144fe22 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -69,6 +69,7 @@ vlVdpPresentationQueueCreate(VdpDevice device,
 
    pq->device = dev;
    pq->compositor = context->create_compositor(context);
+   pq->drawable = pqt->drawable;
    if (!pq->compositor) {
       ret = VDP_STATUS_ERROR;
       goto no_compositor;
@@ -130,7 +131,36 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
                               uint32_t clip_height,
                               VdpTime  earliest_presentation_time)
 {
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   vlVdpPresentationQueue *pq;
+   vlVdpOutputSurface *surf;
+   struct pipe_surface *drawable_surface;
+
+   pq = vlGetDataHTAB(presentation_queue);
+   if (!pq)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   drawable_surface = vl_drawable_surface_get(pq->device->context, pq->drawable);
+   if (!drawable_surface)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   surf = vlGetDataHTAB(surface);
+   if (!surf)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   pq->compositor->clear_layers(pq->compositor);
+   pq->compositor->set_rgba_layer(pq->compositor, 0, surf->sampler_view, NULL, NULL);
+   pq->compositor->render_picture(pq->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME,
+                                  drawable_surface, NULL, NULL);
+
+   pq->device->context->vpipe->screen->flush_frontbuffer
+   (
+      pq->device->context->vpipe->screen,
+      drawable_surface->texture,
+      0, 0,
+      vl_contextprivate_get(pq->device->context, drawable_surface)
+   );
+
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index 496f647a8d0..f0aafae79b5 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -182,7 +182,7 @@ vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface,
 
    for (i = 0; i < 3; ++i) { //TODO put nr of planes into util format
       struct pipe_sampler_view *sv = sampler_views[i];
-      struct pipe_box dst_box = { 0, 0, sv->texture->width0, sv->texture->height0 };
+      struct pipe_box dst_box = { 0, 0, 0, sv->texture->width0, sv->texture->height0, 1 };
       context->upload_sampler(context, sv, &dst_box, source_data[i], source_pitches[i], 0, 0);
    }
 
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 25f289aa726..bd77507567f 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -183,6 +183,7 @@ typedef struct
 typedef struct
 {
    vlVdpDevice *device;
+   Drawable drawable;
    struct pipe_video_compositor *compositor;
 } vlVdpPresentationQueue;
 
-- 
cgit v1.2.3


From 44477ac489a34402d9d3314857fd2358a5e58e48 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 9 Apr 2011 12:01:29 +0200
Subject: [g3dvl] give each mv an individual weight

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 18 +++++++++-------
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     | 27 +++++++++++++-----------
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 455aa52919e..029c85137be 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -136,14 +136,10 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
-   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_Z),
-            ureg_scalar(flags, TGSI_SWIZZLE_W),
-            ureg_imm1f(shader, 0.5f));
-
    for (i = 0; i < 2; ++i)
       for (j = 0; j < 2; ++j) {
          ureg_MAD(shader, ureg_writemask(o_vmv[i][j], TGSI_WRITEMASK_XY), mv_scale, vmv[i][j], ureg_src(t_vpos));
-         ureg_MOV(shader, ureg_writemask(o_vmv[i][j], TGSI_WRITEMASK_Z), ureg_src(t_vpos));
+         ureg_MOV(shader, ureg_writemask(o_vmv[i][j], TGSI_WRITEMASK_Z), ureg_scalar(flags, TGSI_SWIZZLE_Z + i));
       }
 
    ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
@@ -162,7 +158,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
       vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
    ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Z),
-            ureg_scalar(flags, TGSI_SWIZZLE_Z));
+            ureg_scalar(flags, TGSI_SWIZZLE_Y));
 
    ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_X), &label);
 
@@ -297,9 +293,15 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
       ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
       ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
 
-      ureg_LRP(shader, result,
+      ureg_LRP(shader, ref[0],
                ureg_scalar(tc[0][0], TGSI_SWIZZLE_Z),
-               ureg_src(ref[1]), ureg_src(ref[0]));
+               ureg_src(ref[0]), ureg_imm1f(shader, 0.0f));
+
+      ureg_LRP(shader, ref[1],
+               ureg_scalar(tc[1][0], TGSI_SWIZZLE_Z),
+               ureg_src(ref[1]), ureg_imm1f(shader, 0.0f));
+
+      ureg_ADD(shader, result, ureg_src(ref[0]), ureg_src(ref[1]));
 
    ureg_fixup_label(shader, intra_label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 1094b76ec0d..f3e728c65a6 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -33,11 +33,10 @@
 struct vl_vertex_stream
 {
    struct vertex2s pos;
-   int8_t eb[3][2][2];
-   int8_t dct_type_field;
-   int8_t mo_type_frame;
-   int8_t mb_type_intra;
-   int8_t mv_wheights;
+   uint8_t eb[3][2][2];
+   uint8_t dct_type_field;
+   uint8_t mb_type_intra;
+   uint8_t mv_wheights[2];
    struct vertex2s mv[4];
 };
 
@@ -143,11 +142,11 @@ vl_vb_get_elems_state(struct pipe_context *pipe, int component)
 
    /* empty block element of selected component */
    vertex_elems[VS_I_EB].src_offset = 4 + component * 4;
-   vertex_elems[VS_I_EB].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
+   vertex_elems[VS_I_EB].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
 
    /* flags */
    vertex_elems[VS_I_FLAGS].src_offset = 16;
-   vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_SSCALED;
+   vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_UNORM;
 
    /* motion vector 0 TOP element */
    vertex_elems[VS_I_MV0_TOP].src_format = PIPE_FORMAT_R16G16_SSCALED;
@@ -280,23 +279,27 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *
             stream->eb[i][j][k] = !(mb->cbp & (*empty_block_mask)[i][j][k]);
 
    stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
-   stream->mo_type_frame = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME;
+   //stream->mo_type_frame = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME;
    stream->mb_type_intra = mb->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA;
    switch (mb->mb_type) {
       case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
-         stream->mv_wheights = 0;
+         stream->mv_wheights[0] = 255;
+         stream->mv_wheights[1] = 0;
          break;
 
       case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
-         stream->mv_wheights = 1;
+         stream->mv_wheights[0] = 127;
+         stream->mv_wheights[1] = 127;
          break;
 
       case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
-         stream->mv_wheights = 2;
+         stream->mv_wheights[0] = 0;
+         stream->mv_wheights[1] = 255;
          break;
 
       default:
-         stream->mv_wheights = 0;
+         stream->mv_wheights[0] = 0;
+         stream->mv_wheights[1] = 0;
    }
 
    get_motion_vectors(mb, stream->mv);
-- 
cgit v1.2.3


From 7a5390b06fea99f85ab47d40d8dc40e40e0f2ab8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 9 Apr 2011 20:38:20 +0200
Subject: [g3dvl] make mv weights a public interface

---
 src/gallium/auxiliary/vl/vl_vertex_buffers.c   | 75 +++++++-------------------
 src/gallium/include/pipe/p_video_state.h       | 13 +----
 src/gallium/state_trackers/xorg/xvmc/surface.c | 72 +++++++++++++------------
 3 files changed, 61 insertions(+), 99 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index f3e728c65a6..c834042e8ae 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -211,46 +211,30 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 static void
 get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4])
 {
-   if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BI ||
-       mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_FWD) {
+   if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+      mv[0].x = mv[1].x = mb->mv[0].top.x;
+      mv[0].y = mv[1].y = mb->mv[0].top.y;
+      mv[2].x = mv[3].x = mb->mv[1].top.x;
+      mv[2].y = mv[3].y = mb->mv[1].top.y;
 
-      if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-         mv[0].x = mv[1].x = mb->mv[0].top.x;
-         mv[0].y = mv[1].y = mb->mv[0].top.y;
-
-      } else {
-         mv[0].x = mb->mv[0].top.x;
-         mv[0].y = mb->mv[0].top.y - (mb->mv[0].top.y % 4);
-
-         mv[1].x = mb->mv[0].bottom.x;
-         mv[1].y = mb->mv[0].bottom.y - (mb->mv[0].bottom.y % 4);
-
-         if (mb->mv[0].top.field_select) mv[0].y += 2;
-         if (!mb->mv[0].bottom.field_select) mv[1].y -= 2;
-      }
    } else {
-      mv[0].x = mv[0].y = mv[1].x = mv[1].y = 0x8000;
-   }
+      mv[0].x = mb->mv[0].top.x;
+      mv[0].y = mb->mv[0].top.y - (mb->mv[0].top.y % 4);
 
-   if (mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BI ||
-       mb->mb_type == PIPE_MPEG12_MACROBLOCK_TYPE_BKWD) {
+      mv[1].x = mb->mv[0].bottom.x;
+      mv[1].y = mb->mv[0].bottom.y - (mb->mv[0].bottom.y % 4);
 
-      if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-         mv[2].x = mv[3].x = mb->mv[1].top.x;
-         mv[2].y = mv[3].y = mb->mv[1].top.y;
+      if (mb->mv[0].top.field_select) mv[0].y += 2;
+      if (!mb->mv[0].bottom.field_select) mv[1].y -= 2;
 
-      } else {
-         mv[2].x = mb->mv[1].top.x;
-         mv[2].y = mb->mv[1].top.y - (mb->mv[1].top.y % 4);
+      mv[2].x = mb->mv[1].top.x;
+      mv[2].y = mb->mv[1].top.y - (mb->mv[1].top.y % 4);
 
-         mv[3].x = mb->mv[1].bottom.x;
-         mv[3].y = mb->mv[1].bottom.y - (mb->mv[1].bottom.y % 4);
+      mv[3].x = mb->mv[1].bottom.x;
+      mv[3].y = mb->mv[1].bottom.y - (mb->mv[1].bottom.y % 4);
 
-         if (mb->mv[1].top.field_select) mv[2].y += 2;
-         if (!mb->mv[1].bottom.field_select) mv[3].y -= 2;
-      }
-   } else {
-      mv[2].x = mv[2].y = mv[3].x = mv[3].y = 0x8000;
+      if (mb->mv[1].top.field_select) mv[2].y += 2;
+      if (!mb->mv[1].bottom.field_select) mv[3].y -= 2;
    }
 }
 
@@ -279,29 +263,10 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *
             stream->eb[i][j][k] = !(mb->cbp & (*empty_block_mask)[i][j][k]);
 
    stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
-   //stream->mo_type_frame = mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME;
-   stream->mb_type_intra = mb->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA;
-   switch (mb->mb_type) {
-      case PIPE_MPEG12_MACROBLOCK_TYPE_FWD:
-         stream->mv_wheights[0] = 255;
-         stream->mv_wheights[1] = 0;
-         break;
-
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BI:
-         stream->mv_wheights[0] = 127;
-         stream->mv_wheights[1] = 127;
-         break;
-
-      case PIPE_MPEG12_MACROBLOCK_TYPE_BKWD:
-         stream->mv_wheights[0] = 0;
-         stream->mv_wheights[1] = 255;
-         break;
-
-      default:
-         stream->mv_wheights[0] = 0;
-         stream->mv_wheights[1] = 0;
-   }
+   stream->mb_type_intra = !mb->dct_intra;
 
+   stream->mv_wheights[0] = mb->mv[0].wheight;
+   stream->mv_wheights[1] = mb->mv[1].wheight;
    get_motion_vectors(mb, stream->mv);
 }
 
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index c620472283d..72a27938847 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -50,16 +50,6 @@ enum pipe_mpeg12_picture_type
    PIPE_MPEG12_PICTURE_TYPE_FRAME
 };
 
-enum pipe_mpeg12_macroblock_type
-{
-   PIPE_MPEG12_MACROBLOCK_TYPE_INTRA,
-   PIPE_MPEG12_MACROBLOCK_TYPE_FWD,
-   PIPE_MPEG12_MACROBLOCK_TYPE_BKWD,
-   PIPE_MPEG12_MACROBLOCK_TYPE_BI,
-
-   PIPE_MPEG12_MACROBLOCK_NUM_TYPES
-};
-
 enum pipe_mpeg12_motion_type
 {
    PIPE_MPEG12_MOTION_TYPE_FIELD,
@@ -91,10 +81,11 @@ struct pipe_mpeg12_macroblock
 
    unsigned mbx;
    unsigned mby;
-   enum pipe_mpeg12_macroblock_type mb_type;
    enum pipe_mpeg12_motion_type mo_type;
+   bool dct_intra;
    enum pipe_mpeg12_dct_type dct_type;
    struct {
+      unsigned wheight:8;
       struct pipe_mpeg12_motionvector top, bottom;
    } mv[2];
    unsigned cbp;
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index ba55d9ddb22..9585fffffc6 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -37,24 +37,6 @@
 #include <util/u_math.h>
 #include "xvmc_private.h"
 
-static enum pipe_mpeg12_macroblock_type TypeToPipe(int xvmc_mb_type)
-{
-   if (xvmc_mb_type & XVMC_MB_TYPE_INTRA)
-      return PIPE_MPEG12_MACROBLOCK_TYPE_INTRA;
-   if ((xvmc_mb_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) == XVMC_MB_TYPE_MOTION_FORWARD)
-      return PIPE_MPEG12_MACROBLOCK_TYPE_FWD;
-   if ((xvmc_mb_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) == XVMC_MB_TYPE_MOTION_BACKWARD)
-      return PIPE_MPEG12_MACROBLOCK_TYPE_BKWD;
-   if ((xvmc_mb_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) == (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD))
-      return PIPE_MPEG12_MACROBLOCK_TYPE_BI;
-
-   assert(0);
-
-   XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized mb type 0x%08X.\n", xvmc_mb_type);
-
-   return -1;
-}
-
 static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic)
 {
    switch (xvmc_pic) {
@@ -73,21 +55,21 @@ static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic)
    return -1;
 }
 
-static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, unsigned int xvmc_picture_structure)
+static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, unsigned xvmc_picture_structure)
 {
    switch (xvmc_motion_type) {
-      case XVMC_PREDICTION_FRAME:
-         if (xvmc_picture_structure == XVMC_FRAME_PICTURE)
-            return PIPE_MPEG12_MOTION_TYPE_FRAME;
-         else
-            return PIPE_MPEG12_MOTION_TYPE_16x8;
-         break;
-      case XVMC_PREDICTION_FIELD:
-         return PIPE_MPEG12_MOTION_TYPE_FIELD;
-      case XVMC_PREDICTION_DUAL_PRIME:
-         return PIPE_MPEG12_MOTION_TYPE_DUALPRIME;
-      default:
-         assert(0);
+   case XVMC_PREDICTION_FRAME:
+      if (xvmc_picture_structure == XVMC_FRAME_PICTURE)
+         return PIPE_MPEG12_MOTION_TYPE_FRAME;
+      else
+         return PIPE_MPEG12_MOTION_TYPE_16x8;
+      break;
+
+   case XVMC_PREDICTION_FIELD:
+      return PIPE_MPEG12_MOTION_TYPE_FIELD;
+
+   case XVMC_PREDICTION_DUAL_PRIME:
+      return PIPE_MPEG12_MOTION_TYPE_DUALPRIME;
    }
 
    XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized motion type 0x%08X (with picture structure 0x%08X).\n", xvmc_motion_type, xvmc_picture_structure);
@@ -118,15 +100,39 @@ MacroBlocksToPipe(struct pipe_screen *screen,
       mb->base.codec = PIPE_VIDEO_CODEC_MPEG12;
       mb->mbx = xvmc_mb->x;
       mb->mby = xvmc_mb->y;
-      mb->mb_type = TypeToPipe(xvmc_mb->macroblock_type);
-      if (mb->mb_type != PIPE_MPEG12_MACROBLOCK_TYPE_INTRA)
+
+      if (!xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA)
          mb->mo_type = MotionToPipe(xvmc_mb->motion_type, xvmc_picture_structure);
       /* Get rid of Valgrind 'undefined' warnings */
       else
          mb->mo_type = -1;
+
+      mb->dct_intra = xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA;
       mb->dct_type = xvmc_mb->dct_type == XVMC_DCT_TYPE_FIELD ?
          PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
 
+      switch (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) {
+      case XVMC_MB_TYPE_MOTION_FORWARD:
+         mb->mv[0].wheight = 255;
+         mb->mv[1].wheight = 0;
+         break;
+
+      case (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD):
+         mb->mv[0].wheight = 127;
+         mb->mv[1].wheight = 127;
+         break;
+
+      case XVMC_MB_TYPE_MOTION_BACKWARD:
+         mb->mv[0].wheight = 0;
+         mb->mv[1].wheight = 255;
+         break;
+
+      default:
+         mb->mv[0].wheight = 0;
+         mb->mv[1].wheight = 0;
+         break;
+      }
+
       for (j = 0; j < 2; ++j) {
          mb->mv[j].top.x = xvmc_mb->PMV[0][j][0];
          mb->mv[j].top.y = xvmc_mb->PMV[0][j][1];
-- 
cgit v1.2.3


From 816d820b7de50827b9597b99823607cbab9a1ac6 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 9 Apr 2011 20:46:24 +0200
Subject: xvmc: add a workaround for xines xxmc vo plugin

---
 src/gallium/state_trackers/xorg/xvmc/surface.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 9585fffffc6..68d003e470a 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -101,7 +101,7 @@ MacroBlocksToPipe(struct pipe_screen *screen,
       mb->mbx = xvmc_mb->x;
       mb->mby = xvmc_mb->y;
 
-      if (!xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA)
+      if (!(xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA))
          mb->mo_type = MotionToPipe(xvmc_mb->motion_type, xvmc_picture_structure);
       /* Get rid of Valgrind 'undefined' warnings */
       else
@@ -128,8 +128,14 @@ MacroBlocksToPipe(struct pipe_screen *screen,
          break;
 
       default:
-         mb->mv[0].wheight = 0;
-         mb->mv[1].wheight = 0;
+         /* workaround for xines xxmc video out plugin */
+         if (!(xvmc_mb->macroblock_type & ~XVMC_MB_TYPE_PATTERN)) {
+            mb->mv[0].wheight = 255;
+            mb->mv[1].wheight = 0;
+         } else {
+            mb->mv[0].wheight = 0;
+            mb->mv[1].wheight = 0;
+         }
          break;
       }
 
-- 
cgit v1.2.3


From 8b0a9cc62c36bb48d2d7b488787eb2966bcbf7f2 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 10 Apr 2011 00:33:36 +0200
Subject: [g3dvl] get softpipe to work again

---
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index ab52be38c75..1a67e2436e4 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -25,15 +25,19 @@
  *
  **************************************************************************/
 
-#include <vl_winsys.h>
 #include <X11/Xlibint.h>
-#include <state_tracker/xlib_sw_winsys.h>
+
+#include <pipe/p_state.h>
+#include <pipe/p_video_context.h>
+
 #include <util/u_memory.h>
 #include <util/u_format.h>
 #include <util/u_inlines.h>
+
+#include <state_tracker/xlib_sw_winsys.h>
 #include <softpipe/sp_public.h>
-#include <pipe/p_state.h>
-#include <pipe/p_video_context.h>
+
+#include <vl_winsys.h>
 
 struct vl_xsp_screen
 {
@@ -93,6 +97,8 @@ vl_drawable_surface_get(struct vl_context *vctx, Drawable drawable)
       return NULL;
 
    memset(&surf_template, 0, sizeof(surf_template));
+   surf_template.format = templat.format;
+   surf_template.usage = PIPE_BIND_RENDER_TARGET;
    xsp_screen->drawable_surface = vctx->vpipe->create_surface(vctx->vpipe, drawable_tex,
                                                               &surf_template);
    pipe_resource_reference(&drawable_tex, NULL);
@@ -164,22 +170,15 @@ void vl_screen_destroy(struct vl_screen *vscreen)
 }
 
 struct vl_context*
-vl_video_create(struct vl_screen *vscreen,
-                enum pipe_video_profile profile,
-                enum pipe_video_chroma_format chroma_format,
-                unsigned width, unsigned height)
+vl_video_create(struct vl_screen *vscreen)
 {
    struct pipe_video_context *vpipe;
    struct vl_context *vctx;
 
    assert(vscreen);
-   assert(width && height);
    assert(vscreen->pscreen->video_context_create);
 
-   vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen,
-                                                  profile,
-                                                  chroma_format,
-                                                  width, height, NULL);
+   vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen, NULL);
    if (!vpipe)
       return NULL;
 
-- 
cgit v1.2.3


From 31109e1be20d7c94521879c3221a9f77bacbdb8d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 10 Apr 2011 18:46:31 +0200
Subject: [g3dvl] also use video buffer for idct intermediate

---
 src/gallium/auxiliary/vl/vl_idct.c           | 36 ++++---------------
 src/gallium/auxiliary/vl/vl_idct.h           |  4 ++-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 54 ++++++++++++++++++++--------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h |  1 +
 src/gallium/auxiliary/vl/vl_video_buffer.c   | 12 ++-----
 5 files changed, 52 insertions(+), 55 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index dc4a9bbb8c9..5d5ead31f7d 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -455,33 +455,13 @@ cleanup_state(struct vl_idct *idct)
 static bool
 init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
-   struct pipe_resource tex_templ, *tex;
-   struct pipe_sampler_view sv_templ;
+   struct pipe_resource *tex;
    struct pipe_surface surf_templ;
    unsigned i;
 
    assert(idct && buffer);
 
-   memset(&tex_templ, 0, sizeof(tex_templ));
-   tex_templ.target = PIPE_TEXTURE_3D;
-   tex_templ.format = PIPE_FORMAT_R16G16B16A16_SNORM;
-   tex_templ.width0 = idct->buffer_width / NR_RENDER_TARGETS;
-   tex_templ.height0 = idct->buffer_height / 4;
-   tex_templ.depth0 = NR_RENDER_TARGETS;
-   tex_templ.array_size = 1;
-   tex_templ.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-   tex_templ.usage = PIPE_USAGE_STATIC;
-
-   tex = idct->pipe->screen->resource_create(idct->pipe->screen, &tex_templ);
-   if (!tex)
-      goto error_tex;
-
-   memset(&sv_templ, 0, sizeof(sv_templ));
-   u_sampler_view_default_template(&sv_templ, tex, tex->format);
-   buffer->sampler_views.individual.intermediate =
-      idct->pipe->create_sampler_view(idct->pipe, tex, &sv_templ);
-   if (!buffer->sampler_views.individual.intermediate)
-         goto error_sampler_view;
+   tex = buffer->sampler_views.individual.intermediate->texture;
 
    buffer->fb_state[0].width = tex->width0;
    buffer->fb_state[0].height = tex->height0;
@@ -502,19 +482,12 @@ init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    buffer->viewport[0].scale[0] = tex->width0;
    buffer->viewport[0].scale[1] = tex->height0;
 
-   pipe_resource_reference(&tex, NULL);
    return true;
 
 error_surfaces:
    for(i = 0; i < NR_RENDER_TARGETS; ++i)
       pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
 
-   pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
-
-error_sampler_view:
-   pipe_resource_reference(&tex, NULL);
-
-error_tex:
    return false;
 }
 
@@ -644,7 +617,9 @@ vl_idct_cleanup(struct vl_idct *idct)
 
 bool
 vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
-                    struct pipe_sampler_view *source, struct pipe_surface *destination)
+                    struct pipe_sampler_view *source,
+                    struct pipe_sampler_view *intermediate,
+                    struct pipe_surface *destination)
 {
    unsigned i;
 
@@ -656,6 +631,7 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
    pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
    pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
    pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->matrix);
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);
 
    if (!init_intermediate(idct, buffer))
       return false;
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 5d3784ce6c0..4ad798a855b 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -82,7 +82,9 @@ void vl_idct_cleanup(struct vl_idct *idct);
 
 /* init a buffer assosiated with agiven idct instance */
 bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
-                         struct pipe_sampler_view *source, struct pipe_surface *destination);
+                         struct pipe_sampler_view *source,
+                         struct pipe_sampler_view *intermediate,
+                         struct pipe_surface *destination);
 
 /* cleanup a buffer of an idct instance */
 void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 31163b9d08e..afb69e9c3bf 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -154,6 +154,7 @@ vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
 
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
       buf->idct_source->destroy(buf->idct_source);
+      buf->idct_intermediate->destroy(buf->idct_intermediate);
       vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
       vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
       vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
@@ -266,7 +267,7 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
    struct vl_mpeg12_buffer *buffer;
 
-   struct pipe_sampler_view **idct_views, **mc_views;
+   struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv, **mc_source_sv;
    struct pipe_surface **idct_surfaces;
 
    assert(dec);
@@ -309,39 +310,57 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
       if (!buffer->idct_source)
          goto error_idct_source;
 
+      buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                                       dec->base.width / 4, dec->base.height / 4, 4,
+                                                       dec->base.chroma_format, 3,
+                                                       idct_source_formats,
+                                                       PIPE_USAGE_STATIC);
 
-      idct_views = buffer->idct_source->get_sampler_views(buffer->idct_source);
-      if (!idct_views)
-         goto error_idct_views;
+      if (!buffer->idct_intermediate)
+         goto error_idct_intermediate;
+
+      idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source);
+      if (!idct_source_sv)
+         goto error_idct_source_sv;
+
+      idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate);
+      if (!idct_intermediate_sv)
+         goto error_idct_intermediate_sv;
 
       idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source);
       if (!idct_surfaces)
          goto error_idct_surfaces;
 
       if (!vl_idct_init_buffer(&dec->idct_y, &buffer->idct[0],
-                               idct_views[0], idct_surfaces[0]))
+                               idct_source_sv[0],
+                               idct_intermediate_sv[0],
+                               idct_surfaces[0]))
          goto error_idct_y;
 
       if (!vl_idct_init_buffer(&dec->idct_c, &buffer->idct[1],
-                               idct_views[1], idct_surfaces[1]))
+                               idct_source_sv[1],
+                               idct_intermediate_sv[1],
+                               idct_surfaces[1]))
          goto error_idct_cb;
 
       if (!vl_idct_init_buffer(&dec->idct_c, &buffer->idct[2],
-                               idct_views[2], idct_surfaces[2]))
+                               idct_source_sv[2],
+                               idct_intermediate_sv[2],
+                               idct_surfaces[2]))
          goto error_idct_cr;
    }
 
-   mc_views = buffer->mc_source->get_sampler_views(buffer->mc_source);
-   if (!mc_views)
-      goto error_mc_views;
+   mc_source_sv = buffer->mc_source->get_sampler_views(buffer->mc_source);
+   if (!mc_source_sv)
+      goto error_mc_source_sv;
 
-   if(!vl_mpeg12_mc_init_buffer(&dec->mc, &buffer->mc[0], mc_views[0]))
+   if(!vl_mpeg12_mc_init_buffer(&dec->mc, &buffer->mc[0], mc_source_sv[0]))
       goto error_mc_y;
 
-   if(!vl_mpeg12_mc_init_buffer(&dec->mc, &buffer->mc[1], mc_views[1]))
+   if(!vl_mpeg12_mc_init_buffer(&dec->mc, &buffer->mc[1], mc_source_sv[1]))
       goto error_mc_cb;
 
-   if(!vl_mpeg12_mc_init_buffer(&dec->mc, &buffer->mc[2], mc_views[2]))
+   if(!vl_mpeg12_mc_init_buffer(&dec->mc, &buffer->mc[2], mc_source_sv[2]))
       goto error_mc_cr;
 
    return &buffer->base;
@@ -353,7 +372,7 @@ error_mc_cb:
    vl_mpeg12_mc_cleanup_buffer(&buffer->mc[0]);
 
 error_mc_y:
-error_mc_views:
+error_mc_source_sv:
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
       vl_idct_cleanup_buffer(&dec->idct_c, &buffer->idct[2]);
 
@@ -367,7 +386,12 @@ error_idct_cb:
 
 error_idct_y:
 error_idct_surfaces:
-error_idct_views:
+error_idct_intermediate_sv:
+error_idct_source_sv:
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      buffer->idct_intermediate->destroy(buffer->idct_intermediate);
+
+error_idct_intermediate:
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
       buffer->idct_source->destroy(buffer->idct_source);
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index e90f8d3880b..9be807198fe 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -64,6 +64,7 @@ struct vl_mpeg12_buffer
    struct vl_vertex_buffer vertex_stream;
 
    struct pipe_video_buffer *idct_source;
+   struct pipe_video_buffer *idct_intermediate;
    struct pipe_video_buffer *mc_source;
 
    union
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
index 5ea0dfa3736..b1d8fd85dcd 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -153,7 +153,7 @@ vl_video_buffer_init(struct pipe_video_context *context,
    buffer->num_planes = num_planes;
 
    memset(&templ, 0, sizeof(templ));
-   templ.target = PIPE_TEXTURE_2D;
+   templ.target = depth > 1 ? PIPE_TEXTURE_3D : PIPE_TEXTURE_2D;
    templ.format = resource_formats[0];
    templ.width0 = width;
    templ.height0 = height;
@@ -173,16 +173,10 @@ vl_video_buffer_init(struct pipe_video_context *context,
 
    templ.format = resource_formats[1];
    if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      if (depth > 1)
-         templ.depth0 /= 2;
-      else
-         templ.width0 /= 2;
+      templ.width0 /= 2;
       templ.height0 /= 2;
    } else if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
-      if (depth > 1)
-         templ.depth0 /= 2;
-      else
-         templ.height0 /= 2;
+      templ.height0 /= 2;
    }
 
    buffer->resources[1] = pipe->screen->resource_create(pipe->screen, &templ);
-- 
cgit v1.2.3


From fcf765620d803b376582afb618b1f643242b641b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 10 Apr 2011 19:08:11 +0200
Subject: [g3dvl] make number of idct render targets configurable

---
 src/gallium/auxiliary/vl/vl_idct.c           | 22 +++++++++++-----------
 src/gallium/auxiliary/vl/vl_idct.h           |  2 ++
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 10 +++++++---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h |  1 +
 4 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 5d5ead31f7d..3502f4c1eaa 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -38,8 +38,6 @@
 #include <tgsi/tgsi_ureg.h>
 #include "vl_types.h"
 
-#define NR_RENDER_TARGETS 4
-
 enum VS_OUTPUT
 {
    VS_O_VPOS,
@@ -171,7 +169,7 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage)
       ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
       ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
          ureg_scalar(vrect, TGSI_SWIZZLE_X),
-         ureg_imm1f(shader, BLOCK_WIDTH / NR_RENDER_TARGETS));
+         ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets));
 
       ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
 
@@ -250,7 +248,7 @@ create_matrix_frag_shader(struct vl_idct *idct)
    struct ureg_src l_addr[2], r_addr[2];
 
    struct ureg_dst l[4][2], r[2];
-   struct ureg_dst fragment[NR_RENDER_TARGETS];
+   struct ureg_dst fragment[idct->nr_of_render_targets];
 
    unsigned i, j;
 
@@ -264,7 +262,7 @@ create_matrix_frag_shader(struct vl_idct *idct)
    r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
    r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
 
-   for (i = 0; i < NR_RENDER_TARGETS; ++i)
+   for (i = 0; i < idct->nr_of_render_targets; ++i)
        fragment[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, i);
 
    for (i = 0; i < 4; ++i) {
@@ -286,7 +284,7 @@ create_matrix_frag_shader(struct vl_idct *idct)
       fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1));
    }
 
-   for (i = 0; i < NR_RENDER_TARGETS; ++i) {
+   for (i = 0; i < idct->nr_of_render_targets; ++i) {
       if(i > 0)
          increment_addr(shader, r, r_addr, true, true, i, BLOCK_HEIGHT);
 
@@ -465,8 +463,8 @@ init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 
    buffer->fb_state[0].width = tex->width0;
    buffer->fb_state[0].height = tex->height0;
-   buffer->fb_state[0].nr_cbufs = NR_RENDER_TARGETS;
-   for(i = 0; i < NR_RENDER_TARGETS; ++i) {
+   buffer->fb_state[0].nr_cbufs = idct->nr_of_render_targets;
+   for(i = 0; i < idct->nr_of_render_targets; ++i) {
       memset(&surf_templ, 0, sizeof(surf_templ));
       surf_templ.format = tex->format;
       surf_templ.u.tex.first_layer = i;
@@ -485,7 +483,7 @@ init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    return true;
 
 error_surfaces:
-   for(i = 0; i < NR_RENDER_TARGETS; ++i)
+   for(i = 0; i < idct->nr_of_render_targets; ++i)
       pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
 
    return false;
@@ -498,7 +496,7 @@ cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 
    assert(idct && buffer);
 
-   for(i = 0; i < NR_RENDER_TARGETS; ++i)
+   for(i = 0; i < idct->nr_of_render_targets; ++i)
       pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
 
    pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
@@ -584,6 +582,7 @@ error_matrix:
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
                   unsigned blocks_x, unsigned blocks_y,
+                  unsigned nr_of_render_targets,
                   struct pipe_sampler_view *matrix)
 {
    assert(idct && pipe && matrix);
@@ -593,6 +592,7 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
    idct->buffer_height = buffer_height;
    idct->blocks_x = blocks_x;
    idct->blocks_y = blocks_y;
+   idct->nr_of_render_targets = nr_of_render_targets;
    pipe_sampler_view_reference(&idct->matrix, matrix);
 
    if(!init_shaders(idct))
@@ -666,7 +666,7 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 
    assert(idct && buffer);
 
-   for(i = 0; i < NR_RENDER_TARGETS; ++i)
+   for(i = 0; i < idct->nr_of_render_targets; ++i)
       pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
 
    pipe_surface_reference(&buffer->fb_state[1].cbufs[0], NULL);
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 4ad798a855b..e695e32efe3 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -40,6 +40,7 @@ struct vl_idct
    unsigned buffer_width;
    unsigned buffer_height;
    unsigned blocks_x, blocks_y;
+   unsigned nr_of_render_targets;
 
    void *rs_state;
 
@@ -75,6 +76,7 @@ struct pipe_sampler_view *vl_idct_upload_matrix(struct pipe_context *pipe, float
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
                   unsigned blocks_x, unsigned blocks_y,
+                  unsigned nr_of_render_targets,
                   struct pipe_sampler_view *matrix);
 
 /* destroy an idct instance */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index afb69e9c3bf..6d0d132db20 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -311,7 +311,8 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
          goto error_idct_source;
 
       buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                                       dec->base.width / 4, dec->base.height / 4, 4,
+                                                       dec->base.width / dec->nr_of_idct_render_targets,
+                                                       dec->base.height / 4, dec->nr_of_idct_render_targets,
                                                        dec->base.chroma_format, 3,
                                                        idct_source_formats,
                                                        PIPE_USAGE_STATIC);
@@ -550,11 +551,13 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_
    unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
    struct pipe_sampler_view *idct_matrix;
 
+   dec->nr_of_idct_render_targets = 4;
+
    if (!(idct_matrix = vl_idct_upload_matrix(dec->pipe, sqrt(SCALE_FACTOR_16_TO_9))))
       goto error_idct_matrix;
 
    if (!vl_idct_init(&dec->idct_y, dec->pipe, buffer_width, buffer_height,
-                     2, 2, idct_matrix))
+                     2, 2, dec->nr_of_idct_render_targets, idct_matrix))
       goto error_idct_y;
 
    if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
@@ -575,7 +578,8 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_
    }
 
    if(!vl_idct_init(&dec->idct_c, dec->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y, idct_matrix))
+                    chroma_blocks_x, chroma_blocks_y,
+                    dec->nr_of_idct_render_targets, idct_matrix))
       goto error_idct_c;
 
    pipe_sampler_view_reference(&idct_matrix, NULL);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 9be807198fe..c2eb7dd17de 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -45,6 +45,7 @@ struct vl_mpeg12_decoder
    struct pipe_context *pipe;
 
    const unsigned (*empty_block_mask)[3][2][2];
+   unsigned nr_of_idct_render_targets;
 
    struct pipe_vertex_buffer quads;
    void *ves[VL_MAX_PLANES];
-- 
cgit v1.2.3


From 5ed848129cb57269e80caf2c1ca522ae41f4500b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 10 Apr 2011 19:16:38 +0200
Subject: [g3dvl] cleanup headers and comments

---
 src/gallium/auxiliary/vl/vl_compositor.c         | 10 +++++-----
 src/gallium/auxiliary/vl/vl_context.h            |  6 +++---
 src/gallium/auxiliary/vl/vl_csc.c                | 13 +++++++------
 src/gallium/auxiliary/vl/vl_idct.c               | 14 ++++++++------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h     |  6 +++---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 16 +++++++---------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  4 +---
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     |  3 ++-
 src/gallium/auxiliary/vl/vl_video_buffer.h       |  2 +-
 9 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 1278faea4c9..25f7d5fa1da 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -25,18 +25,18 @@
  *
  **************************************************************************/
 
-#include "vl_compositor.h"
-#include "util/u_draw.h"
 #include <assert.h>
+
 #include <pipe/p_context.h>
-#include <util/u_inlines.h>
+
 #include <util/u_memory.h>
-#include <util/u_keymap.h>
 #include <util/u_draw.h>
-#include <util/u_sampler.h>
+
 #include <tgsi/tgsi_ureg.h>
+
 #include "vl_csc.h"
 #include "vl_types.h"
+#include "vl_compositor.h"
 
 typedef float csc_matrix[16];
 
diff --git a/src/gallium/auxiliary/vl/vl_context.h b/src/gallium/auxiliary/vl/vl_context.h
index a4504871e65..9f12a0df925 100644
--- a/src/gallium/auxiliary/vl/vl_context.h
+++ b/src/gallium/auxiliary/vl/vl_context.h
@@ -26,8 +26,8 @@
  *
  **************************************************************************/
 
-#ifndef VL_CONTEXT_H
-#define VL_CONTEXT_H
+#ifndef vl_context_h
+#define vl_context_h
 
 #include <pipe/p_video_context.h>
 
@@ -46,4 +46,4 @@ struct vl_context
 struct pipe_video_context *
 vl_create_context(struct pipe_context *pipe, bool pot_buffers);
 
-#endif /* VL_CONTEXT_H */
+#endif /* vl_context_h */
diff --git a/src/gallium/auxiliary/vl/vl_csc.c b/src/gallium/auxiliary/vl/vl_csc.c
index 5ecc43a5fa3..75159be80df 100644
--- a/src/gallium/auxiliary/vl/vl_csc.c
+++ b/src/gallium/auxiliary/vl/vl_csc.c
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,13 +22,14 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
-#include "vl_csc.h"
 #include <util/u_math.h>
 #include <util/u_debug.h>
 
+#include "vl_csc.h"
+
 /*
  * Color space conversion formulas
  *
diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 3502f4c1eaa..f8e0f02c4f4 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -25,18 +25,20 @@
  *
  **************************************************************************/
 
-#include "vl_idct.h"
-#include "vl_vertex_buffers.h"
-#include "vl_defines.h"
-#include "util/u_draw.h"
 #include <assert.h>
+
 #include <pipe/p_context.h>
 #include <pipe/p_screen.h>
-#include <util/u_inlines.h>
+
+#include <util/u_draw.h>
 #include <util/u_sampler.h>
-#include <util/u_format.h>
+
 #include <tgsi/tgsi_ureg.h>
+
+#include "vl_defines.h"
 #include "vl_types.h"
+#include "vl_vertex_buffers.h"
+#include "vl_idct.h"
 
 enum VS_OUTPUT
 {
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index c2eb7dd17de..8f8c84e365c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -25,8 +25,8 @@
  *
  **************************************************************************/
 
-#ifndef VL_MPEG12_DECODER_H
-#define VL_MPEG12_DECODER_H
+#ifndef vl_mpeg12_decoder_h
+#define vl_mpeg12_decoder_h
 
 #include <pipe/p_video_context.h>
 
@@ -93,4 +93,4 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
                          enum pipe_video_chroma_format chroma_format,
                          unsigned width, unsigned height);
 
-#endif /* VL_MPEG12_DECODER_H */
+#endif /* vl_mpeg12_decoder_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 029c85137be..0f9df6b4d32 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -25,21 +25,19 @@
  *
  **************************************************************************/
 
-#include "vl_mpeg12_mc_renderer.h"
-#include "vl_vertex_buffers.h"
-#include "vl_defines.h"
-#include "util/u_draw.h"
 #include <assert.h>
+
 #include <pipe/p_context.h>
-#include <util/u_inlines.h>
-#include <util/u_format.h>
-#include <util/u_math.h>
-#include <util/u_memory.h>
-#include <util/u_keymap.h>
+
 #include <util/u_sampler.h>
 #include <util/u_draw.h>
+
 #include <tgsi/tgsi_ureg.h>
 
+#include "vl_defines.h"
+#include "vl_vertex_buffers.h"
+#include "vl_mpeg12_mc_renderer.h"
+
 enum VS_OUTPUT
 {
    VS_O_VPOS,
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index c3efda524a8..d4e49216229 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -28,14 +28,12 @@
 #ifndef vl_mpeg12_mc_renderer_h
 #define vl_mpeg12_mc_renderer_h
 
-#include <pipe/p_compiler.h>
 #include <pipe/p_state.h>
 #include <pipe/p_video_state.h>
+
 #include "vl_types.h"
 
 struct pipe_context;
-struct pipe_macroblock;
-struct keymap;
 
 struct vl_mpeg12_mc_renderer
 {
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 837d8bd53f9..58b841836d0 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -29,6 +29,7 @@
 
 #include <pipe/p_state.h>
 #include <pipe/p_video_state.h>
+
 #include "vl_types.h"
 
 /* vertex buffers act as a todo list
@@ -84,4 +85,4 @@ void vl_vb_restart(struct vl_vertex_buffer *buffer,
 
 void vl_vb_cleanup(struct vl_vertex_buffer *buffer);
 
-#endif
+#endif /* vl_vertex_buffers_h */
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h
index 1acc9f49622..3f462acc510 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.h
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.h
@@ -59,4 +59,4 @@ vl_video_buffer_init(struct pipe_video_context *context,
                      unsigned num_planes,
                      const enum pipe_format resource_formats[VL_MAX_PLANES],
                      unsigned usage);
-#endif
+#endif /* vl_ycbcr_buffer_h */
-- 
cgit v1.2.3


From ad4ed0e7f642a536618be183b293286fff1b206b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 10 Apr 2011 20:30:27 +0200
Subject: [g3dvl] give idct it's own init buffer function

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 185 +++++++++++++++------------
 1 file changed, 102 insertions(+), 83 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 6d0d132db20..0be56f83372 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -145,20 +145,35 @@ unmap_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
    }
 }
 
+static void
+cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
+{
+   struct vl_mpeg12_decoder *dec;
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
+   assert(dec);
+
+   buf->idct_source->destroy(buf->idct_source);
+   buf->idct_intermediate->destroy(buf->idct_intermediate);
+   vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
+   vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
+   vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
+}
+
 static void
 vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
-   assert(buf && dec);
+   struct vl_mpeg12_decoder *dec;
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
+   assert(dec);
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      cleanup_idct_buffer(buf);
 
-   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
-      buf->idct_source->destroy(buf->idct_source);
-      buf->idct_intermediate->destroy(buf->idct_intermediate);
-      vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
-      vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
-      vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
-   }
    buf->mc_source->destroy(buf->mc_source);
    vl_vb_cleanup(&buf->vertex_stream);
    vl_mpeg12_mc_cleanup_buffer(&buf->mc[0]);
@@ -249,8 +264,8 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    FREE(dec);
 }
 
-static struct pipe_video_decode_buffer *
-vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
+static bool
+init_idct_buffer(struct vl_mpeg12_buffer *buffer)
 {
    const enum pipe_format idct_source_formats[3] = {
       PIPE_FORMAT_R16G16B16A16_SNORM,
@@ -258,6 +273,76 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
       PIPE_FORMAT_R16G16B16A16_SNORM
    };
 
+   struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv;
+   struct pipe_surface **idct_surfaces;
+
+   struct vl_mpeg12_decoder *dec;
+
+   unsigned i;
+
+   assert(buffer);
+
+   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
+
+   buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                              dec->base.width / 4, dec->base.height, 1,
+                                              dec->base.chroma_format, 3,
+                                              idct_source_formats,
+                                              PIPE_USAGE_STREAM);
+   if (!buffer->idct_source)
+      goto error_source;
+
+   buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                                    dec->base.width / dec->nr_of_idct_render_targets,
+                                                    dec->base.height / 4, dec->nr_of_idct_render_targets,
+                                                    dec->base.chroma_format, 3,
+                                                    idct_source_formats,
+                                                    PIPE_USAGE_STATIC);
+
+   if (!buffer->idct_intermediate)
+      goto error_intermediate;
+
+   idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source);
+   if (!idct_source_sv)
+      goto error_source_sv;
+
+   idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate);
+   if (!idct_intermediate_sv)
+      goto error_intermediate_sv;
+
+   idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source);
+   if (!idct_surfaces)
+      goto error_surfaces;
+
+   for (i = 0; i < 3; ++i)
+      if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
+                               &buffer->idct[i],
+                               idct_source_sv[i],
+                               idct_intermediate_sv[i],
+                               idct_surfaces[i]))
+         goto error_plane;
+
+   return true;
+
+error_plane:
+   for (; i > 0; --i)
+      vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
+
+error_surfaces:
+error_intermediate_sv:
+error_source_sv:
+   buffer->idct_intermediate->destroy(buffer->idct_intermediate);
+
+error_intermediate:
+   buffer->idct_source->destroy(buffer->idct_source);
+
+error_source:
+   return false;
+}
+
+static struct pipe_video_decode_buffer *
+vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
+{
    const enum pipe_format mc_source_formats[3] = {
       PIPE_FORMAT_R16_SNORM,
       PIPE_FORMAT_R16_SNORM,
@@ -267,8 +352,7 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
    struct vl_mpeg12_buffer *buffer;
 
-   struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv, **mc_source_sv;
-   struct pipe_surface **idct_surfaces;
+   struct pipe_sampler_view **mc_source_sv;
 
    assert(dec);
 
@@ -301,55 +385,9 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    if (!buffer->mc_source)
       goto error_mc_source;
 
-   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
-      buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                                 dec->base.width / 4, dec->base.height, 1,
-                                                 dec->base.chroma_format, 3,
-                                                 idct_source_formats,
-                                                 PIPE_USAGE_STREAM);
-      if (!buffer->idct_source)
-         goto error_idct_source;
-
-      buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                                       dec->base.width / dec->nr_of_idct_render_targets,
-                                                       dec->base.height / 4, dec->nr_of_idct_render_targets,
-                                                       dec->base.chroma_format, 3,
-                                                       idct_source_formats,
-                                                       PIPE_USAGE_STATIC);
-
-      if (!buffer->idct_intermediate)
-         goto error_idct_intermediate;
-
-      idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source);
-      if (!idct_source_sv)
-         goto error_idct_source_sv;
-
-      idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate);
-      if (!idct_intermediate_sv)
-         goto error_idct_intermediate_sv;
-
-      idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source);
-      if (!idct_surfaces)
-         goto error_idct_surfaces;
-
-      if (!vl_idct_init_buffer(&dec->idct_y, &buffer->idct[0],
-                               idct_source_sv[0],
-                               idct_intermediate_sv[0],
-                               idct_surfaces[0]))
-         goto error_idct_y;
-
-      if (!vl_idct_init_buffer(&dec->idct_c, &buffer->idct[1],
-                               idct_source_sv[1],
-                               idct_intermediate_sv[1],
-                               idct_surfaces[1]))
-         goto error_idct_cb;
-
-      if (!vl_idct_init_buffer(&dec->idct_c, &buffer->idct[2],
-                               idct_source_sv[2],
-                               idct_intermediate_sv[2],
-                               idct_surfaces[2]))
-         goto error_idct_cr;
-   }
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      if (!init_idct_buffer(buffer))
+         goto error_idct;
 
    mc_source_sv = buffer->mc_source->get_sampler_views(buffer->mc_source);
    if (!mc_source_sv)
@@ -375,28 +413,9 @@ error_mc_cb:
 error_mc_y:
 error_mc_source_sv:
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      vl_idct_cleanup_buffer(&dec->idct_c, &buffer->idct[2]);
-
-error_idct_cr:
-   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      vl_idct_cleanup_buffer(&dec->idct_c, &buffer->idct[1]);
-
-error_idct_cb:
-   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      vl_idct_cleanup_buffer(&dec->idct_y, &buffer->idct[0]);
+      cleanup_idct_buffer(buffer);
 
-error_idct_y:
-error_idct_surfaces:
-error_idct_intermediate_sv:
-error_idct_source_sv:
-   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      buffer->idct_intermediate->destroy(buffer->idct_intermediate);
-
-error_idct_intermediate:
-   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      buffer->idct_source->destroy(buffer->idct_source);
-
-error_idct_source:
+error_idct:
    buffer->mc_source->destroy(buffer->mc_source);
 
 error_mc_source:
-- 
cgit v1.2.3


From b6af6ba6c0e62fc91adb350d882bab69dde1ef7a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 10 Apr 2011 20:34:21 +0200
Subject: [g3dvl] remove unused rasterizer state from mpeg decoder

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 34 ----------------------------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h |  1 -
 2 files changed, 35 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 0be56f83372..ce5d5099106 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -248,7 +248,6 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    dec->pipe->bind_fs_state(dec->pipe, NULL);
 
    dec->pipe->delete_blend_state(dec->pipe, dec->blend);
-   dec->pipe->delete_rasterizer_state(dec->pipe, dec->rast);
    dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa);
 
    vl_mpeg12_mc_renderer_cleanup(&dec->mc);
@@ -486,45 +485,12 @@ vl_mpeg12_decoder_clear_buffer(struct pipe_video_decode_buffer *buffer)
 static bool
 init_pipe_state(struct vl_mpeg12_decoder *dec)
 {
-   struct pipe_rasterizer_state rast;
    struct pipe_blend_state blend;
    struct pipe_depth_stencil_alpha_state dsa;
    unsigned i;
 
    assert(dec);
 
-   memset(&rast, 0, sizeof rast);
-   rast.flatshade = 1;
-   rast.flatshade_first = 0;
-   rast.light_twoside = 0;
-   rast.front_ccw = 1;
-   rast.cull_face = PIPE_FACE_NONE;
-   rast.fill_back = PIPE_POLYGON_MODE_FILL;
-   rast.fill_front = PIPE_POLYGON_MODE_FILL;
-   rast.offset_point = 0;
-   rast.offset_line = 0;
-   rast.scissor = 0;
-   rast.poly_smooth = 0;
-   rast.poly_stipple_enable = 0;
-   rast.sprite_coord_enable = 0;
-   rast.point_size_per_vertex = 0;
-   rast.multisample = 0;
-   rast.line_smooth = 0;
-   rast.line_stipple_enable = 0;
-   rast.line_stipple_factor = 0;
-   rast.line_stipple_pattern = 0;
-   rast.line_last_pixel = 0;
-   rast.line_width = 1;
-   rast.point_smooth = 0;
-   rast.point_quad_rasterization = 0;
-   rast.point_size_per_vertex = 1;
-   rast.offset_units = 1;
-   rast.offset_scale = 1;
-   rast.gl_rasterization_rules = 1;
-
-   dec->rast = dec->pipe->create_rasterizer_state(dec->pipe, &rast);
-   dec->pipe->bind_rasterizer_state(dec->pipe, dec->rast);
-
    memset(&blend, 0, sizeof blend);
 
    blend.independent_blend_enable = 0;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 8f8c84e365c..6cc895b9051 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -53,7 +53,6 @@ struct vl_mpeg12_decoder
    struct vl_idct idct_y, idct_c;
    struct vl_mpeg12_mc_renderer mc;
 
-   void *rast;
    void *dsa;
    void *blend;
 };
-- 
cgit v1.2.3


From 871d6d49c931739d94e646f2b0c3eb5db8a03d69 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 10 Apr 2011 20:49:18 +0200
Subject: [g3dvl] autoconfigure nr of idct render targets

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index ce5d5099106..b905f49fd03 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -536,7 +536,10 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_
    unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
    struct pipe_sampler_view *idct_matrix;
 
-   dec->nr_of_idct_render_targets = 4;
+   dec->nr_of_idct_render_targets = dec->pipe->screen->get_param(dec->pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS);
+
+   // more than 4 render targets usually doesn't makes any seens
+   dec->nr_of_idct_render_targets = MIN2(dec->nr_of_idct_render_targets, 4);
 
    if (!(idct_matrix = vl_idct_upload_matrix(dec->pipe, sqrt(SCALE_FACTOR_16_TO_9))))
       goto error_idct_matrix;
-- 
cgit v1.2.3


From bad3085c7839de734f6b883088f91ae55db61a35 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 10 Apr 2011 22:32:56 +0200
Subject: [g3dvl] autoselect texture formats

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 101 +++++++++++++++++++++------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h |   4 ++
 2 files changed, 82 insertions(+), 23 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index b905f49fd03..d9cf13baec7 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -43,6 +43,32 @@ static const unsigned const_empty_block_mask_420[3][2][2] = {
         { { 0x01, 0x01 },  { 0x01, 0x01 } }
 };
 
+static const enum pipe_format const_idct_source_formats[] = {
+   PIPE_FORMAT_R16G16B16A16_SNORM
+   //PIPE_FORMAT_R16G16B16A16_SSCALED
+};
+
+static const unsigned num_idct_source_formats =
+   sizeof(const_idct_source_formats) / sizeof(enum pipe_format);
+
+static const enum pipe_format const_idct_intermediate_formats[] = {
+   PIPE_FORMAT_R16G16B16A16_FLOAT,
+   PIPE_FORMAT_R16G16B16A16_SNORM
+   //PIPE_FORMAT_R32G32B32A32_FLOAT,
+   //PIPE_FORMAT_R16G16B16A16_SSCALED
+};
+
+static const unsigned num_idct_intermediate_formats =
+   sizeof(const_idct_intermediate_formats) / sizeof(enum pipe_format);
+
+static const enum pipe_format const_mc_source_formats[] = {
+   PIPE_FORMAT_R16_SNORM
+   //PIPE_FORMAT_R16_SSCALED
+};
+
+static const unsigned num_mc_source_formats =
+   sizeof(const_mc_source_formats) / sizeof(enum pipe_format);
+
 static void
 map_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
 {
@@ -266,11 +292,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
 static bool
 init_idct_buffer(struct vl_mpeg12_buffer *buffer)
 {
-   const enum pipe_format idct_source_formats[3] = {
-      PIPE_FORMAT_R16G16B16A16_SNORM,
-      PIPE_FORMAT_R16G16B16A16_SNORM,
-      PIPE_FORMAT_R16G16B16A16_SNORM
-   };
+   enum pipe_format formats[3];
 
    struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv;
    struct pipe_surface **idct_surfaces;
@@ -283,20 +305,20 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer)
 
    dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
 
+   formats[0] = formats[1] = formats[2] = dec->idct_source_format;
    buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
                                               dec->base.width / 4, dec->base.height, 1,
                                               dec->base.chroma_format, 3,
-                                              idct_source_formats,
-                                              PIPE_USAGE_STREAM);
+                                              formats, PIPE_USAGE_STREAM);
    if (!buffer->idct_source)
       goto error_source;
 
+   formats[0] = formats[1] = formats[2] = dec->idct_intermediate_format;
    buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
                                                     dec->base.width / dec->nr_of_idct_render_targets,
                                                     dec->base.height / 4, dec->nr_of_idct_render_targets,
                                                     dec->base.chroma_format, 3,
-                                                    idct_source_formats,
-                                                    PIPE_USAGE_STATIC);
+                                                    formats, PIPE_USAGE_STATIC);
 
    if (!buffer->idct_intermediate)
       goto error_intermediate;
@@ -315,10 +337,8 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer)
 
    for (i = 0; i < 3; ++i)
       if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
-                               &buffer->idct[i],
-                               idct_source_sv[i],
-                               idct_intermediate_sv[i],
-                               idct_surfaces[i]))
+                               &buffer->idct[i], idct_source_sv[i],
+                               idct_intermediate_sv[i], idct_surfaces[i]))
          goto error_plane;
 
    return true;
@@ -342,11 +362,7 @@ error_source:
 static struct pipe_video_decode_buffer *
 vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
 {
-   const enum pipe_format mc_source_formats[3] = {
-      PIPE_FORMAT_R16_SNORM,
-      PIPE_FORMAT_R16_SNORM,
-      PIPE_FORMAT_R16_SNORM
-   };
+   enum pipe_format formats[3];
 
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
    struct vl_mpeg12_buffer *buffer;
@@ -375,11 +391,11 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    if (!buffer->vertex_bufs.individual.stream.buffer)
       goto error_vertex_stream;
 
+   formats[0] = formats[1] = formats[2] =dec->mc_source_format;
    buffer->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
                                             dec->base.width, dec->base.height, 1,
                                             dec->base.chroma_format, 3,
-                                            mc_source_formats,
-                                            PIPE_USAGE_STATIC);
+                                            formats, PIPE_USAGE_STATIC);
 
    if (!buffer->mc_source)
       goto error_mc_source;
@@ -530,6 +546,27 @@ init_pipe_state(struct vl_mpeg12_decoder *dec)
    return true;
 }
 
+static enum pipe_format
+find_first_supported_format(struct vl_mpeg12_decoder *dec,
+                            const enum pipe_format formats[],
+                            unsigned num_formats,
+                            enum pipe_texture_target target)
+{
+   struct pipe_screen *screen;
+   unsigned i;
+
+   assert(dec);
+
+   screen = dec->pipe->screen;
+
+   for (i = 0; i < num_formats; ++i)
+      if (screen->is_format_supported(dec->pipe->screen, formats[i], target, 1,
+                                      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
+         return formats[i];
+
+   return PIPE_FORMAT_NONE;
+}
+
 static bool
 init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_height)
 {
@@ -541,6 +578,18 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_
    // more than 4 render targets usually doesn't makes any seens
    dec->nr_of_idct_render_targets = MIN2(dec->nr_of_idct_render_targets, 4);
 
+   dec->idct_source_format = find_first_supported_format(dec, const_idct_source_formats,
+                                                         num_idct_source_formats, PIPE_TEXTURE_2D);
+
+   if (dec->idct_source_format == PIPE_FORMAT_NONE)
+      return false;
+
+   dec->idct_intermediate_format = find_first_supported_format(dec, const_idct_intermediate_formats,
+                                                               num_idct_intermediate_formats, PIPE_TEXTURE_3D);
+
+   if (dec->idct_intermediate_format == PIPE_FORMAT_NONE)
+      return false;
+
    if (!(idct_matrix = vl_idct_upload_matrix(dec->pipe, sqrt(SCALE_FACTOR_16_TO_9))))
       goto error_idct_matrix;
 
@@ -626,13 +675,19 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
    dec->empty_block_mask = &const_empty_block_mask_420;
 
+   if (!vl_mpeg12_mc_renderer_init(&dec->mc, dec->pipe, dec->base.width, dec->base.height,
+                                   entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 1.0f : SCALE_FACTOR_16_TO_9))
+      goto error_mc;
+
    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
       if (!init_idct(dec, dec->base.width, dec->base.height))
          goto error_idct;
 
-   if (!vl_mpeg12_mc_renderer_init(&dec->mc, dec->pipe, dec->base.width, dec->base.height,
-                                   entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 1.0f : SCALE_FACTOR_16_TO_9))
-      goto error_mc;
+   dec->mc_source_format = find_first_supported_format(dec, const_mc_source_formats,
+                                                       num_mc_source_formats, PIPE_TEXTURE_3D);
+
+   if (dec->mc_source_format == PIPE_FORMAT_NONE)
+      return false;
 
    if (!init_pipe_state(dec))
       goto error_pipe_state;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 6cc895b9051..25048e8543c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -47,6 +47,10 @@ struct vl_mpeg12_decoder
    const unsigned (*empty_block_mask)[3][2][2];
    unsigned nr_of_idct_render_targets;
 
+   enum pipe_format idct_source_format;
+   enum pipe_format idct_intermediate_format;
+   enum pipe_format mc_source_format;
+
    struct pipe_vertex_buffer quads;
    void *ves[VL_MAX_PLANES];
 
-- 
cgit v1.2.3


From 36b322dffd2429130f132f55f68acb1a23ba1658 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 11 Apr 2011 00:49:28 +0200
Subject: r600g: support textures with scaled number formats

---
 src/gallium/drivers/r600/eg_state_inlines.h   | 1 +
 src/gallium/drivers/r600/r600_state_inlines.h | 1 +
 src/gallium/drivers/r600/r600_texture.c       | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h
index cae3888051b..487adddcb72 100644
--- a/src/gallium/drivers/r600/eg_state_inlines.h
+++ b/src/gallium/drivers/r600/eg_state_inlines.h
@@ -373,6 +373,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
 		/* 64-bit buffers. */
 	case PIPE_FORMAT_R16G16B16A16_UNORM:
 	case PIPE_FORMAT_R16G16B16A16_SNORM:
+	case PIPE_FORMAT_R16G16B16A16_SSCALED:
 	case PIPE_FORMAT_R16G16B16A16_FLOAT:
 
 		/* 128-bit buffers. */
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 9e6ae295239..2a40f41bbf5 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -368,6 +368,7 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
 		/* 64-bit buffers. */
 	case PIPE_FORMAT_R16G16B16A16_UNORM:
 	case PIPE_FORMAT_R16G16B16A16_SNORM:
+	case PIPE_FORMAT_R16G16B16A16_SSCALED:
 	case PIPE_FORMAT_R16G16B16A16_FLOAT:
 
 		/* 128-bit buffers. */
diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index d079b571156..690aeafcc52 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -996,7 +996,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
 	case UTIL_FORMAT_TYPE_SIGNED:
 		if (!desc->channel[i].normalized &&
 		    desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) {
-			goto out_unknown;
+			word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_SCALED);
 		}
 
 		switch (desc->channel[i].size) {
-- 
cgit v1.2.3


From ccc80d2c09ad35f867c0c0a85f7e1cadd73941bb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 11 Apr 2011 23:55:36 +0200
Subject: [g3dvl] fully support different formats for source and intermediate
 textures

---
 src/gallium/auxiliary/vl/vl_idct.c           |   7 +-
 src/gallium/auxiliary/vl/vl_idct.h           |   4 +-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 118 +++++++++++++++++++--------
 3 files changed, 94 insertions(+), 35 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index f8e0f02c4f4..a20263bc182 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -585,7 +585,8 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
                   unsigned blocks_x, unsigned blocks_y,
                   unsigned nr_of_render_targets,
-                  struct pipe_sampler_view *matrix)
+                  struct pipe_sampler_view *matrix,
+                  struct pipe_sampler_view *transpose)
 {
    assert(idct && pipe && matrix);
 
@@ -595,7 +596,9 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
    idct->blocks_x = blocks_x;
    idct->blocks_y = blocks_y;
    idct->nr_of_render_targets = nr_of_render_targets;
+
    pipe_sampler_view_reference(&idct->matrix, matrix);
+   pipe_sampler_view_reference(&idct->transpose, transpose);
 
    if(!init_shaders(idct))
       return false;
@@ -632,7 +635,7 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
 
    pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
    pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
-   pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->matrix);
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
    pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);
 
    if (!init_intermediate(idct, buffer))
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index e695e32efe3..0875f17476c 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -50,6 +50,7 @@ struct vl_idct
    void *matrix_fs, *transpose_fs;
 
    struct pipe_sampler_view *matrix;
+   struct pipe_sampler_view *transpose;
 };
 
 /* a set of buffers to work with */
@@ -77,7 +78,8 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
                   unsigned blocks_x, unsigned blocks_y,
                   unsigned nr_of_render_targets,
-                  struct pipe_sampler_view *matrix);
+                  struct pipe_sampler_view *matrix,
+                  struct pipe_sampler_view *transpose);
 
 /* destroy an idct instance */
 void vl_idct_cleanup(struct vl_idct *idct);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index d9cf13baec7..40a1b74f654 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -35,17 +35,18 @@
 #include "vl_mpeg12_decoder.h"
 #include "vl_defines.h"
 
-#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
+#define SCALE_FACTOR_SNORM (32768.0f / 256.0f)
+#define SCALE_FACTOR_SSCALED (1.0f / 256.0f)
 
 static const unsigned const_empty_block_mask_420[3][2][2] = {
-        { { 0x20, 0x10 },  { 0x08, 0x04 } },
-        { { 0x02, 0x02 },  { 0x02, 0x02 } },
-        { { 0x01, 0x01 },  { 0x01, 0x01 } }
+   { { 0x20, 0x10 },  { 0x08, 0x04 } },
+   { { 0x02, 0x02 },  { 0x02, 0x02 } },
+   { { 0x01, 0x01 },  { 0x01, 0x01 } }
 };
 
 static const enum pipe_format const_idct_source_formats[] = {
-   PIPE_FORMAT_R16G16B16A16_SNORM
-   //PIPE_FORMAT_R16G16B16A16_SSCALED
+   PIPE_FORMAT_R16G16B16A16_SNORM,
+   PIPE_FORMAT_R16G16B16A16_SSCALED
 };
 
 static const unsigned num_idct_source_formats =
@@ -53,17 +54,17 @@ static const unsigned num_idct_source_formats =
 
 static const enum pipe_format const_idct_intermediate_formats[] = {
    PIPE_FORMAT_R16G16B16A16_FLOAT,
-   PIPE_FORMAT_R16G16B16A16_SNORM
-   //PIPE_FORMAT_R32G32B32A32_FLOAT,
-   //PIPE_FORMAT_R16G16B16A16_SSCALED
+   PIPE_FORMAT_R16G16B16A16_SNORM,
+   PIPE_FORMAT_R16G16B16A16_SSCALED,
+   PIPE_FORMAT_R32G32B32A32_FLOAT
 };
 
 static const unsigned num_idct_intermediate_formats =
    sizeof(const_idct_intermediate_formats) / sizeof(enum pipe_format);
 
 static const enum pipe_format const_mc_source_formats[] = {
-   PIPE_FORMAT_R16_SNORM
-   //PIPE_FORMAT_R16_SSCALED
+   PIPE_FORMAT_R16_SNORM,
+   PIPE_FORMAT_R16_SSCALED
 };
 
 static const unsigned num_mc_source_formats =
@@ -571,7 +572,8 @@ static bool
 init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_height)
 {
    unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
-   struct pipe_sampler_view *idct_matrix;
+   struct pipe_sampler_view *matrix, *transpose;
+   float matrix_scale, transpose_scale;
 
    dec->nr_of_idct_render_targets = dec->pipe->screen->get_param(dec->pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS);
 
@@ -590,12 +592,41 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_
    if (dec->idct_intermediate_format == PIPE_FORMAT_NONE)
       return false;
 
-   if (!(idct_matrix = vl_idct_upload_matrix(dec->pipe, sqrt(SCALE_FACTOR_16_TO_9))))
-      goto error_idct_matrix;
+   switch (dec->idct_source_format) {
+   case PIPE_FORMAT_R16G16B16A16_SSCALED:
+      matrix_scale = SCALE_FACTOR_SSCALED;
+      break;
+
+   case PIPE_FORMAT_R16G16B16A16_SNORM:
+      matrix_scale = SCALE_FACTOR_SNORM;
+      break;
+
+   default:
+      assert(0);
+      return false;
+   }
+
+   if (dec->idct_intermediate_format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
+       dec->idct_intermediate_format == PIPE_FORMAT_R32G32B32A32_FLOAT)
+      transpose_scale = 1.0f;
+   else
+      transpose_scale = matrix_scale = sqrt(matrix_scale);
+
+   if (dec->mc_source_format == PIPE_FORMAT_R16_SSCALED)
+      transpose_scale /= SCALE_FACTOR_SSCALED;
+
+   if (!(matrix = vl_idct_upload_matrix(dec->pipe, matrix_scale)))
+      goto error_matrix;
+
+   if (matrix_scale != transpose_scale) {
+      if (!(transpose = vl_idct_upload_matrix(dec->pipe, transpose_scale)))
+         goto error_transpose;
+   } else
+      pipe_sampler_view_reference(&transpose, matrix);
 
    if (!vl_idct_init(&dec->idct_y, dec->pipe, buffer_width, buffer_height,
-                     2, 2, dec->nr_of_idct_render_targets, idct_matrix))
-      goto error_idct_y;
+                     2, 2, dec->nr_of_idct_render_targets, matrix, transpose))
+      goto error_y;
 
    if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
       chroma_width = buffer_width / 2;
@@ -616,19 +647,23 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_
 
    if(!vl_idct_init(&dec->idct_c, dec->pipe, chroma_width, chroma_height,
                     chroma_blocks_x, chroma_blocks_y,
-                    dec->nr_of_idct_render_targets, idct_matrix))
-      goto error_idct_c;
+                    dec->nr_of_idct_render_targets, matrix, transpose))
+      goto error_c;
 
-   pipe_sampler_view_reference(&idct_matrix, NULL);
+   pipe_sampler_view_reference(&matrix, NULL);
+   pipe_sampler_view_reference(&transpose, NULL);
    return true;
 
-error_idct_c:
+error_c:
    vl_idct_cleanup(&dec->idct_y);
 
-error_idct_y:
-   pipe_sampler_view_reference(&idct_matrix, NULL);
+error_y:
+   pipe_sampler_view_reference(&transpose, NULL);
+
+error_transpose:
+   pipe_sampler_view_reference(&matrix, NULL);
 
-error_idct_matrix:
+error_matrix:
    return false;
 }
 
@@ -641,6 +676,7 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
                          unsigned width, unsigned height)
 {
    struct vl_mpeg12_decoder *dec;
+   float mc_scale;
    unsigned i;
 
    assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
@@ -675,19 +711,37 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
    dec->empty_block_mask = &const_empty_block_mask_420;
 
-   if (!vl_mpeg12_mc_renderer_init(&dec->mc, dec->pipe, dec->base.width, dec->base.height,
-                                   entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 1.0f : SCALE_FACTOR_16_TO_9))
-      goto error_mc;
-
-   if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      if (!init_idct(dec, dec->base.width, dec->base.height))
-         goto error_idct;
-
    dec->mc_source_format = find_first_supported_format(dec, const_mc_source_formats,
                                                        num_mc_source_formats, PIPE_TEXTURE_3D);
 
    if (dec->mc_source_format == PIPE_FORMAT_NONE)
-      return false;
+      return NULL;
+
+   if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      if (!init_idct(dec, dec->base.width, dec->base.height))
+         goto error_idct;
+      if (dec->mc_source_format == PIPE_FORMAT_R16_SSCALED)
+         mc_scale = SCALE_FACTOR_SSCALED;
+      else
+         mc_scale = 1.0f;
+   } else {
+      switch (dec->mc_source_format) {
+      case PIPE_FORMAT_R16_SNORM:
+         mc_scale = SCALE_FACTOR_SNORM;
+         break;
+
+      case PIPE_FORMAT_R16_SSCALED:
+         mc_scale = SCALE_FACTOR_SSCALED;
+         break;
+
+      default:
+         assert(0);
+         return NULL;
+      }
+   }
+
+   if (!vl_mpeg12_mc_renderer_init(&dec->mc, dec->pipe, dec->base.width, dec->base.height, mc_scale))
+      goto error_mc;
 
    if (!init_pipe_state(dec))
       goto error_pipe_state;
-- 
cgit v1.2.3


From 4f3fb1586aebfe248321e935651b5af92b5a8261 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 12 Apr 2011 19:21:07 +0200
Subject: [g3dvl] make resource format selection a public interface

---
 src/gallium/auxiliary/vl/vl_context.c          |  9 ++-------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   |  6 +++---
 src/gallium/auxiliary/vl/vl_video_buffer.c     | 14 ++++++++------
 src/gallium/auxiliary/vl/vl_video_buffer.h     |  1 -
 src/gallium/include/pipe/p_video_context.h     |  1 +
 src/gallium/state_trackers/vdpau/surface.c     |  7 +++++++
 src/gallium/state_trackers/xorg/xvmc/surface.c |  7 +++++++
 7 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index 2bc027fedc5..ba732548732 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -199,15 +199,10 @@ vl_context_create_decoder(struct pipe_video_context *context,
 static struct pipe_video_buffer *
 vl_context_create_buffer(struct pipe_video_context *context,
                          enum pipe_format buffer_format,
+                         enum pipe_format resource_formats[3],
                          enum pipe_video_chroma_format chroma_format,
                          unsigned width, unsigned height)
 {
-   const enum pipe_format resource_formats[3] = {
-      PIPE_FORMAT_R8_SNORM,
-      PIPE_FORMAT_R8_SNORM,
-      PIPE_FORMAT_R8_SNORM
-   };
-
    struct vl_context *ctx = (struct vl_context*)context;
    struct pipe_video_buffer *result;
    unsigned buffer_width, buffer_height;
@@ -221,7 +216,7 @@ vl_context_create_buffer(struct pipe_video_context *context,
 
    result = vl_video_buffer_init(context, ctx->pipe,
                                  buffer_width, buffer_height, 1,
-                                 chroma_format, 3,
+                                 chroma_format,
                                  resource_formats,
                                  PIPE_USAGE_STATIC);
    if (result) // TODO move format handling into vl_video_buffer
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 40a1b74f654..ce0393848e3 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -309,7 +309,7 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer)
    formats[0] = formats[1] = formats[2] = dec->idct_source_format;
    buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
                                               dec->base.width / 4, dec->base.height, 1,
-                                              dec->base.chroma_format, 3,
+                                              dec->base.chroma_format,
                                               formats, PIPE_USAGE_STREAM);
    if (!buffer->idct_source)
       goto error_source;
@@ -318,7 +318,7 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer)
    buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
                                                     dec->base.width / dec->nr_of_idct_render_targets,
                                                     dec->base.height / 4, dec->nr_of_idct_render_targets,
-                                                    dec->base.chroma_format, 3,
+                                                    dec->base.chroma_format,
                                                     formats, PIPE_USAGE_STATIC);
 
    if (!buffer->idct_intermediate)
@@ -395,7 +395,7 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    formats[0] = formats[1] = formats[2] =dec->mc_source_format;
    buffer->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
                                             dec->base.width, dec->base.height, 1,
-                                            dec->base.chroma_format, 3,
+                                            dec->base.chroma_format,
                                             formats, PIPE_USAGE_STATIC);
 
    if (!buffer->mc_source)
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
index b1d8fd85dcd..dad8dd2c9ae 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -133,7 +133,6 @@ vl_video_buffer_init(struct pipe_video_context *context,
                      struct pipe_context *pipe,
                      unsigned width, unsigned height, unsigned depth,
                      enum pipe_video_chroma_format chroma_format,
-                     unsigned num_planes,
                      const enum pipe_format resource_formats[VL_MAX_PLANES],
                      unsigned usage)
 {
@@ -142,7 +141,6 @@ vl_video_buffer_init(struct pipe_video_context *context,
    unsigned i;
 
    assert(context && pipe);
-   assert(num_planes > 0 && num_planes <= VL_MAX_PLANES);
 
    buffer = CALLOC_STRUCT(vl_video_buffer);
 
@@ -150,7 +148,7 @@ vl_video_buffer_init(struct pipe_video_context *context,
    buffer->base.get_sampler_views = vl_video_buffer_sampler_views;
    buffer->base.get_surfaces = vl_video_buffer_surfaces;
    buffer->pipe = pipe;
-   buffer->num_planes = num_planes;
+   buffer->num_planes = 1;
 
    memset(&templ, 0, sizeof(templ));
    templ.target = depth > 1 ? PIPE_TEXTURE_3D : PIPE_TEXTURE_2D;
@@ -166,10 +164,12 @@ vl_video_buffer_init(struct pipe_video_context *context,
    if (!buffer->resources[0])
       goto error;
 
-   if (num_planes == 1) {
+   if (resource_formats[1] == PIPE_FORMAT_NONE) {
       assert(chroma_format == PIPE_VIDEO_CHROMA_FORMAT_444);
+      assert(resource_formats[2] == PIPE_FORMAT_NONE);
       return &buffer->base;
-   }
+   } else
+      buffer->num_planes = 2;
 
    templ.format = resource_formats[1];
    if (chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
@@ -183,8 +183,10 @@ vl_video_buffer_init(struct pipe_video_context *context,
    if (!buffer->resources[1])
       goto error;
 
-   if (num_planes == 2)
+   if (resource_formats[2] == PIPE_FORMAT_NONE)
       return &buffer->base;
+   else
+      buffer->num_planes = 3;
 
    templ.format = resource_formats[2];
    buffer->resources[2] = pipe->screen->resource_create(pipe->screen, &templ);
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h
index 3f462acc510..f5c424cf296 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.h
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.h
@@ -56,7 +56,6 @@ vl_video_buffer_init(struct pipe_video_context *context,
                      struct pipe_context *pipe,
                      unsigned width, unsigned height, unsigned depth,
                      enum pipe_video_chroma_format chroma_format,
-                     unsigned num_planes,
                      const enum pipe_format resource_formats[VL_MAX_PLANES],
                      unsigned usage);
 #endif /* vl_ycbcr_buffer_h */
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 21d0581226d..22203b66d6f 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -121,6 +121,7 @@ struct pipe_video_context
     */
    struct pipe_video_buffer *(*create_buffer)(struct pipe_video_context *context,
                                               enum pipe_format buffer_format,
+                                              enum pipe_format resource_formats[3],
                                               enum pipe_video_chroma_format chroma_format,
                                               unsigned width, unsigned height);
 
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index f0aafae79b5..2b1eb047141 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -41,6 +41,12 @@ vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
                         uint32_t width, uint32_t height,
                         VdpVideoSurface *surface)
 {
+   const enum pipe_format resource_formats[3] = {
+      PIPE_FORMAT_R8_UNORM,
+      PIPE_FORMAT_R8_UNORM,
+      PIPE_FORMAT_R8_UNORM
+   };
+
    vlVdpSurface *p_surf;
    VdpStatus ret;
 
@@ -71,6 +77,7 @@ vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
    p_surf->device = dev;
    p_surf->video_buffer = dev->context->vpipe->create_buffer(dev->context->vpipe,
                                                              PIPE_FORMAT_YV12, // most common used
+                                                             resource_formats,
                                                              ChromaToPipe(chroma_type),
                                                              width, height);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 68d003e470a..10701856223 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -197,6 +197,12 @@ unmap_and_flush_surface(XvMCSurfacePrivate *surface)
 PUBLIC
 Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surface)
 {
+   const enum pipe_format resource_formats[3] = {
+      PIPE_FORMAT_R8_SNORM,
+      PIPE_FORMAT_R8_SNORM,
+      PIPE_FORMAT_R8_SNORM
+   };
+
    XvMCContextPrivate *context_priv;
    struct pipe_video_context *vpipe;
    XvMCSurfacePrivate *surface_priv;
@@ -219,6 +225,7 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
 
    surface_priv->decode_buffer = context_priv->decoder->create_buffer(context_priv->decoder);
    surface_priv->video_buffer = vpipe->create_buffer(vpipe, PIPE_FORMAT_YV12, //TODO
+                                                     resource_formats,
                                                      context_priv->decoder->chroma_format,
                                                      context_priv->decoder->width,
                                                      context_priv->decoder->height);
-- 
cgit v1.2.3


From 62373e8f9e948ac441d9fe355edfc0dca5f9df9c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 12 Apr 2011 19:42:47 +0200
Subject: vdpau: set at least a basic csc matrix

---
 src/gallium/state_trackers/vdpau/mixer.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index 2fe0f1ca6f4..83cbf8abdb1 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -26,8 +26,12 @@
  **************************************************************************/
 
 #include <vdpau/vdpau.h>
+
 #include <util/u_memory.h>
 #include <util/u_debug.h>
+
+#include <vl/vl_csc.h>
+
 #include "vdpau_private.h"
 
 VdpStatus
@@ -42,6 +46,7 @@ vlVdpVideoMixerCreate(VdpDevice device,
    vlVdpVideoMixer *vmixer = NULL;
    struct pipe_video_context *context;
    VdpStatus ret;
+   float csc[16];
 
    debug_printf("[VDPAU] Creating VideoMixer\n");
 
@@ -58,6 +63,14 @@ vlVdpVideoMixerCreate(VdpDevice device,
    vmixer->device = dev;
    vmixer->compositor = context->create_compositor(context);
 
+   vl_csc_get_matrix
+   (
+      debug_get_bool_option("G3DVL_NO_CSC", FALSE) ?
+      VL_CSC_COLOR_STANDARD_IDENTITY : VL_CSC_COLOR_STANDARD_BT_601,
+      NULL, true, csc
+   );
+   vmixer->compositor->set_csc_matrix(vmixer->compositor, csc);
+
    /*
     * TODO: Handle features and parameters
     * */
-- 
cgit v1.2.3


From f63aba41fb7eba142b0b22a2a46cb264157c2505 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 12 Apr 2011 20:19:59 +0200
Subject: vdpau: switch cb cr planes in vlVdpVideoSurfacePutBitsYCbCr

---
 src/gallium/state_trackers/vdpau/surface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index 2b1eb047141..57f5563b12f 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -188,7 +188,7 @@ vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface,
       return VDP_STATUS_RESOURCES;
 
    for (i = 0; i < 3; ++i) { //TODO put nr of planes into util format
-      struct pipe_sampler_view *sv = sampler_views[i];
+      struct pipe_sampler_view *sv = sampler_views[i ? i ^ 3 : 0];
       struct pipe_box dst_box = { 0, 0, 0, sv->texture->width0, sv->texture->height0, 1 };
       context->upload_sampler(context, sv, &dst_box, source_data[i], source_pitches[i], 0, 0);
    }
-- 
cgit v1.2.3


From b48676672592271597d07e5ece79cf4d3ffbe04b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 12 Apr 2011 20:38:21 +0200
Subject: xvmc: cleanup headers

---
 src/gallium/state_trackers/xorg/xvmc/attributes.c   | 12 +++++++-----
 src/gallium/state_trackers/xorg/xvmc/block.c        | 13 ++++++++-----
 src/gallium/state_trackers/xorg/xvmc/context.c      |  7 ++++++-
 src/gallium/state_trackers/xorg/xvmc/subpicture.c   |  7 ++++++-
 src/gallium/state_trackers/xorg/xvmc/surface.c      |  7 ++++++-
 src/gallium/state_trackers/xorg/xvmc/xvmc_private.h | 17 ++++++++++++-----
 6 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/attributes.c b/src/gallium/state_trackers/xorg/xvmc/attributes.c
index d23d8635b66..c1cea655241 100644
--- a/src/gallium/state_trackers/xorg/xvmc/attributes.c
+++ b/src/gallium/state_trackers/xorg/xvmc/attributes.c
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,13 +22,15 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #include <assert.h>
+
 #include <X11/Xlib.h>
 #include <X11/extensions/Xvlib.h>
 #include <X11/extensions/XvMClib.h>
+
 #include <pipe/p_compiler.h>
 
 PUBLIC
diff --git a/src/gallium/state_trackers/xorg/xvmc/block.c b/src/gallium/state_trackers/xorg/xvmc/block.c
index c7da7a84a7b..6b0b21273f5 100644
--- a/src/gallium/state_trackers/xorg/xvmc/block.c
+++ b/src/gallium/state_trackers/xorg/xvmc/block.c
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,13 +22,16 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #include <assert.h>
+
 #include <X11/Xlib.h>
 #include <X11/extensions/XvMClib.h>
+
 #include <util/u_memory.h>
+
 #include "xvmc_private.h"
 
 PUBLIC
diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index 6f136f2b121..b0338336ae8 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -26,15 +26,20 @@
  **************************************************************************/
 
 #include <assert.h>
+
 #include <X11/Xlibint.h>
 #include <X11/extensions/XvMClib.h>
+
 #include <pipe/p_screen.h>
 #include <pipe/p_video_context.h>
 #include <pipe/p_video_state.h>
 #include <pipe/p_state.h>
-#include <vl_winsys.h>
+
 #include <util/u_memory.h>
+
 #include <vl/vl_csc.h>
+#include <vl_winsys.h>
+
 #include "xvmc_private.h"
 
 static Status Validate(Display *dpy, XvPortID port, int surface_type_id,
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index da9e87f50dd..1b884053eae 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -26,17 +26,22 @@
  **************************************************************************/
 
 #include <assert.h>
+
 #include <X11/Xlibint.h>
 #include <X11/extensions/XvMClib.h>
 #include <xorg/fourcc.h>
-#include <vl_winsys.h>
+
 #include <pipe/p_screen.h>
 #include <pipe/p_video_context.h>
 #include <pipe/p_state.h>
+
 #include <util/u_memory.h>
 #include <util/u_math.h>
 #include <util/u_format.h>
 #include <util/u_sampler.h>
+
+#include <vl_winsys.h>
+
 #include "xvmc_private.h"
 
 #define FOURCC_RGB 0x0000003
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 10701856223..217f985bc40 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -27,14 +27,19 @@
 
 #include <assert.h>
 #include <stdio.h>
+
 #include <X11/Xlibint.h>
-#include <vl_winsys.h>
+
 #include <pipe/p_video_context.h>
 #include <pipe/p_video_state.h>
 #include <pipe/p_state.h>
+
 #include <util/u_inlines.h>
 #include <util/u_memory.h>
 #include <util/u_math.h>
+
+#include <vl_winsys.h>
+
 #include "xvmc_private.h"
 
 static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic)
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 9a5338c2923..b0239f4c46d 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -30,12 +30,20 @@
 
 #include <X11/Xlib.h>
 #include <X11/extensions/XvMClib.h>
+
 #include <util/u_debug.h>
+#include <util/u_math.h>
 
 #define BLOCK_SIZE_SAMPLES 64
 #define BLOCK_SIZE_BYTES (BLOCK_SIZE_SAMPLES * 2)
 
 struct vl_context;
+
+struct pipe_video_decoder;
+struct pipe_video_compositor;
+struct pipe_video_decode_buffer;
+struct pipe_video_buffer;
+
 struct pipe_sampler_view;
 struct pipe_fence_handle;
 
@@ -93,14 +101,13 @@ typedef struct
 #define XVMC_ERR   1
 #define XVMC_WARN  2
 #define XVMC_TRACE 3
+
 static INLINE void XVMC_MSG(unsigned int level, const char *fmt, ...)
 {
-   static boolean check_dbg_level = TRUE;
-   static unsigned int debug_level;
+   static int debug_level = -1;
 
-   if (check_dbg_level) {
-      debug_level = debug_get_num_option("XVMC_DEBUG", 0);
-      check_dbg_level = FALSE;
+   if (debug_level == -1) {
+      debug_level = MIN2(debug_get_num_option("XVMC_DEBUG", 0), 0);
    }
 
    if (level <= debug_level) {
-- 
cgit v1.2.3


From 3745025b28aa6142e7465f2af1387aebc99b5a70 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 12 Apr 2011 20:45:31 +0200
Subject: [g3dvl] make resource_format param const

---
 src/gallium/auxiliary/vl/vl_context.c      | 2 +-
 src/gallium/include/pipe/p_video_context.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index ba732548732..49a9b50863c 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -199,7 +199,7 @@ vl_context_create_decoder(struct pipe_video_context *context,
 static struct pipe_video_buffer *
 vl_context_create_buffer(struct pipe_video_context *context,
                          enum pipe_format buffer_format,
-                         enum pipe_format resource_formats[3],
+                         const enum pipe_format resource_formats[3],
                          enum pipe_video_chroma_format chroma_format,
                          unsigned width, unsigned height)
 {
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 22203b66d6f..81fc2812249 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -121,7 +121,7 @@ struct pipe_video_context
     */
    struct pipe_video_buffer *(*create_buffer)(struct pipe_video_context *context,
                                               enum pipe_format buffer_format,
-                                              enum pipe_format resource_formats[3],
+                                              const enum pipe_format resource_formats[3],
                                               enum pipe_video_chroma_format chroma_format,
                                               unsigned width, unsigned height);
 
-- 
cgit v1.2.3


From 4d057864d0d523c241e40ad675487276789e3b36 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 12 Apr 2011 21:42:08 +0200
Subject: xvmc: flush surface of macroblock (0,0) is detected

---
 src/gallium/state_trackers/xorg/xvmc/surface.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 217f985bc40..59840a1394e 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -85,22 +85,18 @@ static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, unsigned
 static void
 MacroBlocksToPipe(struct pipe_screen *screen,
                   unsigned int xvmc_picture_structure,
-                  const XvMCMacroBlockArray *xvmc_macroblocks,
+                  const XvMCMacroBlock *xvmc_mb,
                   const XvMCBlockArray *xvmc_blocks,
-                  unsigned int first_macroblock,
                   unsigned int num_macroblocks,
                   struct pipe_mpeg12_macroblock *mb)
 {
    unsigned int i, j;
-   XvMCMacroBlock *xvmc_mb;
 
-   assert(xvmc_macroblocks);
+   assert(xvmc_mb);
    assert(xvmc_blocks);
    assert(mb);
    assert(num_macroblocks);
 
-   xvmc_mb = xvmc_macroblocks->macro_blocks + first_macroblock;
-
    for (i = 0; i < num_macroblocks; ++i) {
       mb->base.codec = PIPE_VIDEO_CODEC_MPEG12;
       mb->mbx = xvmc_mb->x;
@@ -259,10 +255,13 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
 {
    struct pipe_video_context *vpipe;
    struct pipe_video_decode_buffer *t_buffer;
+
    XvMCContextPrivate *context_priv;
    XvMCSurfacePrivate *target_surface_priv;
    XvMCSurfacePrivate *past_surface_priv;
    XvMCSurfacePrivate *future_surface_priv;
+   XvMCMacroBlock *xvmc_mb;
+
    struct pipe_mpeg12_macroblock pipe_macroblocks[num_macroblocks];
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Rendering to surface %p, with past %p and future %p\n",
@@ -316,17 +315,19 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    if (future_surface)
       unmap_and_flush_surface(future_surface->privData);
 
+   xvmc_mb = macroblocks->macro_blocks + first_macroblock;
+
    /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
    if (target_surface_priv->mapped && (
        target_surface_priv->ref_surfaces[0] != past_surface ||
-       target_surface_priv->ref_surfaces[1] != future_surface)) {
+       target_surface_priv->ref_surfaces[1] != future_surface ||
+       (xvmc_mb->x == 0 && xvmc_mb->y == 0))) {
 
-      // If they change anyway we need to flush our surface
+      // If they change anyway we need to clear our surface
       unmap_and_flush_surface(target_surface_priv);
    }
 
-   MacroBlocksToPipe(vpipe->screen, picture_structure, macroblocks, blocks, first_macroblock,
-                     num_macroblocks, pipe_macroblocks);
+   MacroBlocksToPipe(vpipe->screen, picture_structure, xvmc_mb, blocks, num_macroblocks, pipe_macroblocks);
 
    if (!target_surface_priv->mapped) {
       t_buffer->map(t_buffer);
-- 
cgit v1.2.3


From 87e81a3e9db1a30f6f31e6e91aeb5acfdc8b589f Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 12 Apr 2011 21:51:41 +0200
Subject: xvmc: fix compiler warning

---
 src/gallium/state_trackers/xorg/xvmc/surface.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 59840a1394e..c8c8638e581 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -467,8 +467,10 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    if(dump_window) {
       static unsigned int framenum = 0;
       char cmd[256];
+
       sprintf(cmd, "xwd -id %d -out xvmc_frame_%08d.xwd", (int)drawable, ++framenum);
-      system(cmd);
+      if (system(cmd) != 0)
+         XVMC_MSG(XVMC_ERR, "[XvMC] Dumping surface %p failed.\n", surface);
    }
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Pushed surface %p to front buffer.\n", surface);
-- 
cgit v1.2.3


From 537370be4b8aa3ecac8c5b0905f3cfc08e71da0d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 12 Apr 2011 22:07:03 +0200
Subject: xvmc: move subpicture swizzle into own function

---
 src/gallium/state_trackers/xorg/xvmc/subpicture.c | 57 ++++++++++++-----------
 1 file changed, 31 insertions(+), 26 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 1b884053eae..31b1a96512f 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -78,6 +78,36 @@ static unsigned NumPaletteEntries4XvID(int xvimage_id)
    }
 }
 
+static void XvIDToSwizzle(int xvimage_id, struct pipe_sampler_view *tmpl)
+{
+   switch (xvimage_id) {
+      default:
+         XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized Xv image ID 0x%08X.\n", xvimage_id);
+
+      /* fall through */
+      case FOURCC_RGB:
+         tmpl->swizzle_r = PIPE_SWIZZLE_BLUE;
+         tmpl->swizzle_g = PIPE_SWIZZLE_GREEN;
+         tmpl->swizzle_b = PIPE_SWIZZLE_RED;
+         tmpl->swizzle_a = PIPE_SWIZZLE_ONE;
+         break;
+
+      case FOURCC_AI44:
+         tmpl->swizzle_r = PIPE_SWIZZLE_ALPHA;
+         tmpl->swizzle_g = PIPE_SWIZZLE_ZERO;
+         tmpl->swizzle_b = PIPE_SWIZZLE_ZERO;
+         tmpl->swizzle_a = PIPE_SWIZZLE_RED;
+         break;
+
+      case FOURCC_IA44:
+         tmpl->swizzle_r = PIPE_SWIZZLE_RED;
+         tmpl->swizzle_g = PIPE_SWIZZLE_ZERO;
+         tmpl->swizzle_b = PIPE_SWIZZLE_ZERO;
+         tmpl->swizzle_a = PIPE_SWIZZLE_ALPHA;
+         break;
+   }
+}
+
 static int PipeToComponentOrder(enum pipe_format format, char *component_order)
 {
    assert(component_order);
@@ -220,32 +250,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
 
    memset(&sampler_templ, 0, sizeof(sampler_templ));
    u_sampler_view_default_template(&sampler_templ, tex, tex->format);
-
-   switch (xvimage_id) {
-      case FOURCC_RGB:
-         sampler_templ.swizzle_r = PIPE_SWIZZLE_BLUE;
-         sampler_templ.swizzle_g = PIPE_SWIZZLE_GREEN;
-         sampler_templ.swizzle_b = PIPE_SWIZZLE_RED;
-         sampler_templ.swizzle_a = PIPE_SWIZZLE_ONE;
-         break;
-
-      case FOURCC_AI44:
-         sampler_templ.swizzle_r = PIPE_SWIZZLE_ALPHA;
-         sampler_templ.swizzle_g = PIPE_SWIZZLE_ZERO;
-         sampler_templ.swizzle_b = PIPE_SWIZZLE_ZERO;
-         sampler_templ.swizzle_a = PIPE_SWIZZLE_RED;
-         break;
-
-      case FOURCC_IA44:
-         sampler_templ.swizzle_r = PIPE_SWIZZLE_RED;
-         sampler_templ.swizzle_g = PIPE_SWIZZLE_ZERO;
-         sampler_templ.swizzle_b = PIPE_SWIZZLE_ZERO;
-         sampler_templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
-         break;
-
-      default:
-         XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized Xv image ID 0x%08X.\n", xvimage_id);
-   }
+   XvIDToSwizzle(xvimage_id, &sampler_templ);
 
    subpicture_priv->sampler = vpipe->create_sampler_view(vpipe, tex, &sampler_templ);
    pipe_resource_reference(&tex, NULL);
-- 
cgit v1.2.3


From c7b65dcaffeb9d0760c8ecad052f4c79297bfc8a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 13 Apr 2011 18:50:18 +0200
Subject: xvmc: Define some Xv attribs to allow users to specify color standard
 and procamp

---
 src/gallium/auxiliary/vl/vl_csc.c                  |  18 +++-
 src/gallium/auxiliary/vl/vl_csc.h                  |  12 ++-
 src/gallium/state_trackers/xorg/xvmc/attributes.c  | 112 ++++++++++++++++++++-
 src/gallium/state_trackers/xorg/xvmc/context.c     |  11 +-
 .../state_trackers/xorg/xvmc/xvmc_private.h        |   7 +-
 5 files changed, 142 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_csc.c b/src/gallium/auxiliary/vl/vl_csc.c
index 75159be80df..00eefa293a4 100644
--- a/src/gallium/auxiliary/vl/vl_csc.c
+++ b/src/gallium/auxiliary/vl/vl_csc.c
@@ -155,6 +155,13 @@ static const float identity[16] =
    0.0f, 0.0f, 0.0f, 1.0f
 };
 
+const struct vl_procamp vl_default_procamp = {
+   .contrast = 1.0f,
+   .saturation = 1.0f,
+   .brightness = 0.0f,
+   .hue = 0.0f
+};
+
 void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs,
                        struct vl_procamp *procamp,
                        bool full_range,
@@ -163,10 +170,13 @@ void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs,
    float ybias = full_range ? -16.0f/255.0f : 0.0f;
    float cbbias = -128.0f/255.0f;
    float crbias = -128.0f/255.0f;
-   float c = procamp ? procamp->contrast : 1.0f;
-   float s = procamp ? procamp->saturation : 1.0f;
-   float b = procamp ? procamp->brightness : 0.0f;
-   float h = procamp ? procamp->hue : 0.0f;
+
+   const struct vl_procamp *p = procamp ? procamp : &vl_default_procamp;
+   float c = p->contrast;
+   float s = p->saturation;
+   float b = p->brightness;
+   float h = p->hue;
+
    const float *cstd;
 
    assert(matrix);
diff --git a/src/gallium/auxiliary/vl/vl_csc.h b/src/gallium/auxiliary/vl/vl_csc.h
index 722ca35f339..9b73fb3aef2 100644
--- a/src/gallium/auxiliary/vl/vl_csc.h
+++ b/src/gallium/auxiliary/vl/vl_csc.h
@@ -1,8 +1,8 @@
 /**************************************************************************
- * 
+ *
  * Copyright 2009 Younes Manton.
  * All Rights Reserved.
- * 
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- * 
+ *
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- * 
+ *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,7 +22,7 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
+ *
  **************************************************************************/
 
 #ifndef vl_csc_h
@@ -45,6 +45,8 @@ enum VL_CSC_COLOR_STANDARD
    VL_CSC_COLOR_STANDARD_BT_709
 };
 
+extern const struct vl_procamp vl_default_procamp;
+
 void vl_csc_get_matrix(enum VL_CSC_COLOR_STANDARD cs,
                        struct vl_procamp *procamp,
                        bool full_range,
diff --git a/src/gallium/state_trackers/xorg/xvmc/attributes.c b/src/gallium/state_trackers/xorg/xvmc/attributes.c
index c1cea655241..06d5dc919b4 100644
--- a/src/gallium/state_trackers/xorg/xvmc/attributes.c
+++ b/src/gallium/state_trackers/xorg/xvmc/attributes.c
@@ -26,27 +26,131 @@
  **************************************************************************/
 
 #include <assert.h>
+#include <stdlib.h>
 
 #include <X11/Xlib.h>
 #include <X11/extensions/Xvlib.h>
 #include <X11/extensions/XvMClib.h>
 
-#include <pipe/p_compiler.h>
+#include <vl/vl_compositor.h>
+
+#include "xvmc_private.h"
+
+#define XV_BRIGHTNESS "XV_BRIGHTNESS"
+#define XV_CONTRAST   "XV_CONTRAST"
+#define XV_SATURATION "XV_SATURATION"
+#define XV_HUE        "XV_HUE"
+#define XV_COLORSPACE "XV_COLORSPACE"
+
+static const XvAttribute attributes[] = {
+   { XvGettable | XvSettable, -1000, 1000, XV_BRIGHTNESS },
+   { XvGettable | XvSettable, -1000, 1000, XV_CONTRAST },
+   { XvGettable | XvSettable, -1000, 1000, XV_SATURATION },
+   { XvGettable | XvSettable, -1000, 1000, XV_HUE },
+   { XvGettable | XvSettable, 0, 1, XV_COLORSPACE }
+};
 
 PUBLIC
 XvAttribute* XvMCQueryAttributes(Display *dpy, XvMCContext *context, int *number)
 {
-   return NULL;
+   XvMCContextPrivate *context_priv;
+   XvAttribute *result;
+
+   assert(dpy && number);
+
+   if (!context || !context->privData)
+      return NULL;
+
+   context_priv = context->privData;
+
+   result = malloc(sizeof(attributes));
+   if (!result)
+      return NULL;
+
+   memcpy(result, attributes, sizeof(attributes));
+   *number = sizeof(attributes) / sizeof(XvAttribute);
+
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Returning %d attributes for context %p.\n", *number, context);
+
+   return result;
 }
 
 PUBLIC
 Status XvMCSetAttribute(Display *dpy, XvMCContext *context, Atom attribute, int value)
 {
-   return BadImplementation;
+   XvMCContextPrivate *context_priv;
+   const char *attr;
+   float csc[16];
+
+   assert(dpy);
+
+   if (!context || !context->privData)
+      return XvMCBadContext;
+
+   context_priv = context->privData;
+
+   attr = XGetAtomName(dpy, attribute);
+   if (!attr)
+      return XvMCBadContext;
+
+   if (strcmp(attr, XV_BRIGHTNESS))
+      context_priv->procamp.brightness = value / 1000.0f;
+   else if (strcmp(attr, XV_CONTRAST))
+      context_priv->procamp.contrast = value / 1000.0f + 1.0f;
+   else if (strcmp(attr, XV_SATURATION))
+      context_priv->procamp.saturation = value / 1000.0f + 1.0f;
+   else if (strcmp(attr, XV_HUE))
+      context_priv->procamp.hue = value / 1000.0f;
+   else if (strcmp(attr, XV_COLORSPACE))
+      context_priv->color_standard = value ?
+         VL_CSC_COLOR_STANDARD_BT_601 :
+         VL_CSC_COLOR_STANDARD_BT_709;
+   else
+      return BadName;
+
+   vl_csc_get_matrix
+   (
+      context_priv->color_standard,
+      &context_priv->procamp, true, csc
+   );
+   context_priv->compositor->set_csc_matrix(context_priv->compositor, csc);
+
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Set attribute %s to value %d.\n", attr, value);
+
+   return Success;
 }
 
 PUBLIC
 Status XvMCGetAttribute(Display *dpy, XvMCContext *context, Atom attribute, int *value)
 {
-   return BadImplementation;
+   XvMCContextPrivate *context_priv;
+   const char *attr;
+
+   assert(dpy);
+
+   if (!context || !context->privData)
+      return XvMCBadContext;
+
+   context_priv = context->privData;
+
+   attr = XGetAtomName(dpy, attribute);
+   if (!attr)
+      return XvMCBadContext;
+
+   if (strcmp(attr, XV_BRIGHTNESS))
+      *value = context_priv->procamp.brightness * 1000;
+   else if (strcmp(attr, XV_CONTRAST))
+      *value = context_priv->procamp.contrast * 1000 - 1000;
+   else if (strcmp(attr, XV_SATURATION))
+      *value = context_priv->procamp.saturation * 1000 + 1000;
+   else if (strcmp(attr, XV_HUE))
+      *value = context_priv->procamp.hue * 1000;
+   else if (strcmp(attr, XV_COLORSPACE))
+      *value = context_priv->color_standard == VL_CSC_COLOR_STANDARD_BT_709;
+   else
+      return BadName;
+
+   XVMC_MSG(XVMC_TRACE, "[XvMC] Got value %d for attribute %s.\n", *value, attr);
+
+   return Success;
 }
diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index b0338336ae8..f77dc0906bb 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -270,12 +270,15 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       return BadAlloc;
    }
 
-   /* TODO: Define some Xv attribs to allow users to specify color standard, procamp */
+   context_priv->color_standard =
+      debug_get_bool_option("G3DVL_NO_CSC", FALSE) ?
+      VL_CSC_COLOR_STANDARD_IDENTITY : VL_CSC_COLOR_STANDARD_BT_601;
+   context_priv->procamp = vl_default_procamp;
+
    vl_csc_get_matrix
    (
-      debug_get_bool_option("G3DVL_NO_CSC", FALSE) ?
-      VL_CSC_COLOR_STANDARD_IDENTITY : VL_CSC_COLOR_STANDARD_BT_601,
-      NULL, true, csc
+      context_priv->color_standard,
+      &context_priv->procamp, true, csc
    );
    context_priv->compositor->set_csc_matrix(context_priv->compositor, csc);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index b0239f4c46d..b902d7d2817 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -34,6 +34,8 @@
 #include <util/u_debug.h>
 #include <util/u_math.h>
 
+#include <vl/vl_csc.h>
+
 #define BLOCK_SIZE_SAMPLES 64
 #define BLOCK_SIZE_BYTES (BLOCK_SIZE_SAMPLES * 2)
 
@@ -53,6 +55,9 @@ typedef struct
    struct pipe_video_decoder *decoder;
    struct pipe_video_compositor *compositor;
 
+   enum VL_CSC_COLOR_STANDARD color_standard;
+   struct vl_procamp procamp;
+
    unsigned short subpicture_max_width;
    unsigned short subpicture_max_height;
 } XvMCContextPrivate;
@@ -107,7 +112,7 @@ static INLINE void XVMC_MSG(unsigned int level, const char *fmt, ...)
    static int debug_level = -1;
 
    if (debug_level == -1) {
-      debug_level = MIN2(debug_get_num_option("XVMC_DEBUG", 0), 0);
+      debug_level = MAX2(debug_get_num_option("XVMC_DEBUG", 0), 0);
    }
 
    if (level <= debug_level) {
-- 
cgit v1.2.3


From efaf024f8c7c1000af06e54a85378818d55c5160 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 13 Apr 2011 19:32:49 +0200
Subject: xvmc: use a pipe_video_rect for subpicture src & dst

---
 src/gallium/state_trackers/xorg/xvmc/subpicture.c   | 13 +++++--------
 src/gallium/state_trackers/xorg/xvmc/surface.c      |  7 +++----
 src/gallium/state_trackers/xorg/xvmc/xvmc_private.h |  9 +++++----
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 31b1a96512f..68519c08885 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -417,6 +417,9 @@ Status XvMCBlendSubpicture(Display *dpy, XvMCSurface *target_surface, XvMCSubpic
                            short subx, short suby, unsigned short subw, unsigned short subh,
                            short surfx, short surfy, unsigned short surfw, unsigned short surfh)
 {
+   struct pipe_video_rect src_rect = {subx, suby, subw, subh};
+   struct pipe_video_rect dst_rect = {surfx, surfy, surfw, surfh};
+
    XvMCSurfacePrivate *surface_priv;
    XvMCSubpicturePrivate *subpicture_priv;
 
@@ -439,16 +442,10 @@ Status XvMCBlendSubpicture(Display *dpy, XvMCSurface *target_surface, XvMCSubpic
    subpicture_priv = subpicture->privData;
 
    /* TODO: Assert rects are within bounds? Or clip? */
+   subpicture_priv->src_rect = src_rect;
+   subpicture_priv->dst_rect = dst_rect;
 
    surface_priv->subpicture = subpicture;
-   surface_priv->subx = subx;
-   surface_priv->suby = suby;
-   surface_priv->subw = subw;
-   surface_priv->subh = subh;
-   surface_priv->surfx = surfx;
-   surface_priv->surfy = surfy;
-   surface_priv->surfw = surfw;
-   surface_priv->surfh = surfh;
    subpicture_priv->surface = target_surface;
 
    return Success;
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index c8c8638e581..f22d315c90d 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -431,14 +431,13 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    compositor->set_buffer_layer(compositor, 0, surface_priv->video_buffer, &src_rect, NULL);
 
    if (subpicture_priv) {
-      struct pipe_video_rect src_rect = {surface_priv->subx, surface_priv->suby, surface_priv->subw, surface_priv->subh};
-      struct pipe_video_rect dst_rect = {surface_priv->surfx, surface_priv->surfy, surface_priv->surfw, surface_priv->surfh};
-
       XVMC_MSG(XVMC_TRACE, "[XvMC] Surface %p has subpicture %p.\n", surface, surface_priv->subpicture);
 
       assert(subpicture_priv->surface == surface);
+
       if (subpicture_priv->palette)
-         compositor->set_palette_layer(compositor, 1, subpicture_priv->sampler, subpicture_priv->palette, &src_rect, &dst_rect);
+         compositor->set_palette_layer(compositor, 1, subpicture_priv->sampler, subpicture_priv->palette,
+                                       &subpicture_priv->src_rect, &subpicture_priv->dst_rect);
       else
          compositor->set_rgba_layer(compositor, 1, subpicture_priv->sampler, &src_rect, &dst_rect);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index b902d7d2817..056bdfc2f3c 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -31,6 +31,8 @@
 #include <X11/Xlib.h>
 #include <X11/extensions/XvMClib.h>
 
+#include <pipe/p_video_state.h>
+
 #include <util/u_debug.h>
 #include <util/u_math.h>
 
@@ -77,10 +79,6 @@ typedef struct
 
    /* The subpicture associated with this surface, if any. */
    XvMCSubpicture *subpicture;
-   short subx, suby;
-   unsigned short subw, subh;
-   short surfx, surfy;
-   unsigned short surfw, surfh;
 
    /* Some XvMC functions take a surface but not a context,
       so we keep track of which context each surface belongs to. */
@@ -94,6 +92,9 @@ typedef struct
    /* optional palette for this subpicture */
    struct pipe_sampler_view *palette;
 
+   struct pipe_video_rect src_rect;
+   struct pipe_video_rect dst_rect;
+
    /* The surface this subpicture is currently associated with, if any. */
    XvMCSurface *surface;
 
-- 
cgit v1.2.3


From 751eb75310f62bd9b7ef18df1730958a467c99ad Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 13 Apr 2011 20:07:21 +0200
Subject: [g3dvl] move intra handling into fetch_ycbcr

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 71 ++++++++++++------------
 1 file changed, 35 insertions(+), 36 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 0f9df6b4d32..c01d1255804 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -241,12 +241,17 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
 
       ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
 
+      ureg_CMP(shader, t_tc, ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
+               ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 0.5f));
+
+      if (scale != 1.0f)
+         ureg_MAD(shader, texel, ureg_src(texel), ureg_imm1f(shader, scale), ureg_src(t_tc));
+      else
+         ureg_ADD(shader, texel, ureg_src(texel), ureg_src(t_tc));
+
    ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
 
-   if (scale != 1.0f)
-      ureg_MUL(shader, texel, ureg_src(texel), ureg_imm1f(shader, scale));
-
    ureg_release_temporary(shader, t_tc);
 
    return texel;
@@ -257,7 +262,7 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 {
    struct ureg_src tc[2][2], sampler[2];
    struct ureg_dst ref[2], result;
-   unsigned i, intra_label;
+   unsigned i;
 
    tc[0][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_TOP, TGSI_INTERPOLATE_LINEAR);
    tc[0][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_BOTTOM, TGSI_INTERPOLATE_LINEAR);
@@ -271,38 +276,32 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
 
    result = ureg_DECL_temporary(shader);
 
-   ureg_MOV(shader, result, ureg_imm1f(shader, 0.5f));
-
-   ureg_IF(shader, ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z), &intra_label);
-      /*
-       * if (field.z)
-       *    ref[0..1] = tex(tc[0..1], sampler[0..1])
-       * else
-       *    ref[0..1] = tex(tc[2..3], sampler[0..1])
-       * result = LRP(info.y, ref[0..1])
-       */
-      ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY),
-               ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
-               tc[0][1], tc[0][0]);
-      ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY),
-               ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
-               tc[1][1], tc[1][0]);
-
-      ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
-      ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
-
-      ureg_LRP(shader, ref[0],
-               ureg_scalar(tc[0][0], TGSI_SWIZZLE_Z),
-               ureg_src(ref[0]), ureg_imm1f(shader, 0.0f));
-
-      ureg_LRP(shader, ref[1],
-               ureg_scalar(tc[1][0], TGSI_SWIZZLE_Z),
-               ureg_src(ref[1]), ureg_imm1f(shader, 0.0f));
-
-      ureg_ADD(shader, result, ureg_src(ref[0]), ureg_src(ref[1]));
-
-   ureg_fixup_label(shader, intra_label, ureg_get_instruction_number(shader));
-   ureg_ENDIF(shader);
+   /*
+    * if (field.z)
+    *    ref[0..1] = tex(tc[0..1], sampler[0..1])
+    * else
+    *    ref[0..1] = tex(tc[2..3], sampler[0..1])
+    * result = LRP(info.y, ref[0..1])
+    */
+   ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[0][1], tc[0][0]);
+   ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[1][1], tc[1][0]);
+
+   ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
+   ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
+
+   ureg_LRP(shader, ref[0],
+            ureg_scalar(tc[0][0], TGSI_SWIZZLE_Z),
+            ureg_src(ref[0]), ureg_imm1f(shader, 0.0f));
+
+   ureg_LRP(shader, ref[1],
+            ureg_scalar(tc[1][0], TGSI_SWIZZLE_Z),
+            ureg_src(ref[1]), ureg_imm1f(shader, 0.0f));
+
+   ureg_ADD(shader, result, ureg_src(ref[0]), ureg_src(ref[1]));
 
    for (i = 0; i < 2; ++i)
       ureg_release_temporary(shader, ref[i]);
-- 
cgit v1.2.3


From b88fa924009b5cc572187d3ca6a395d5226aa1c3 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 13 Apr 2011 22:45:54 +0200
Subject: [g3dvl] split mc into seperate stages

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c     |   5 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 150 +++++++++++++++--------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   3 +-
 3 files changed, 102 insertions(+), 56 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index ce0393848e3..669d082f873 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -455,7 +455,6 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
    struct pipe_sampler_view **sv_future;
    struct pipe_surface **surfaces;
 
-   struct pipe_sampler_view *sv_refs[2];
    unsigned ne_start, ne_num, e_start, e_num;
    unsigned i;
 
@@ -472,9 +471,11 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
    vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
 
    dec->pipe->set_vertex_buffers(dec->pipe, 2, buf->vertex_bufs.all);
-   dec->pipe->bind_blend_state(dec->pipe, dec->blend);
 
    for (i = 0; i < VL_MAX_PLANES; ++i) {
+      struct pipe_sampler_view *sv_refs[2];
+
+      dec->pipe->bind_blend_state(dec->pipe, dec->blend);
       dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves[i]);
 
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index c01d1255804..1d915908042 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -209,21 +209,31 @@ calc_field(struct ureg_program *shader)
    return tmp;
 }
 
-static struct ureg_dst
-fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct ureg_dst field, float scale)
+static void *
+create_ycbcr_frag_shader(struct vl_mpeg12_mc_renderer *r, float scale)
 {
+   struct ureg_program *shader;
    struct ureg_src tc[2], sampler;
-   struct ureg_dst texel, t_tc;
+   struct ureg_dst texel, t_tc, field;
+   struct ureg_dst fragment;
    unsigned label;
 
-   texel = ureg_DECL_temporary(shader);
-   t_tc = ureg_DECL_temporary(shader);
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
 
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_TOP, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_BOTTOM, TGSI_INTERPOLATE_LINEAR);
 
    sampler = ureg_DECL_sampler(shader, 0);
 
+   t_tc = ureg_DECL_temporary(shader);
+   texel = ureg_DECL_temporary(shader);
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   field = calc_field(shader);
+
    /*
     * texel.y  = tex(field.y ? tc[1] : tc[0], sampler[0])
     * texel.cb = tex(tc[2], sampler[1])
@@ -236,7 +246,7 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
 
    ureg_SLT(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc), ureg_imm1f(shader, 0.5f));
 
-   ureg_MOV(shader, ureg_writemask(texel, TGSI_WRITEMASK_XYZ), ureg_imm1f(shader, 0.0f));
+   ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
    ureg_IF(shader, ureg_scalar(ureg_src(t_tc), TGSI_SWIZZLE_Z), &label);
 
       ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
@@ -245,25 +255,32 @@ fetch_ycbcr(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader, struct
                ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 0.5f));
 
       if (scale != 1.0f)
-         ureg_MAD(shader, texel, ureg_src(texel), ureg_imm1f(shader, scale), ureg_src(t_tc));
+         ureg_MAD(shader, fragment, ureg_src(texel), ureg_imm1f(shader, scale), ureg_src(t_tc));
       else
-         ureg_ADD(shader, texel, ureg_src(texel), ureg_src(t_tc));
+         ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(t_tc));
 
    ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
 
    ureg_release_temporary(shader, t_tc);
+   ureg_release_temporary(shader, texel);
 
-   return texel;
+   return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
-static struct ureg_dst
-fetch_ref(struct ureg_program *shader, struct ureg_dst field)
+static void *
+create_ref_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
+   struct ureg_program *shader;
    struct ureg_src tc[2][2], sampler[2];
-   struct ureg_dst ref[2], result;
+   struct ureg_dst ref[2], field;
+   struct ureg_dst fragment;
    unsigned i;
 
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
    tc[0][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_TOP, TGSI_INTERPOLATE_LINEAR);
    tc[0][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_BOTTOM, TGSI_INTERPOLATE_LINEAR);
    tc[1][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_TOP, TGSI_INTERPOLATE_LINEAR);
@@ -274,7 +291,9 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
       ref[i] = ureg_DECL_temporary(shader);
    }
 
-   result = ureg_DECL_temporary(shader);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   field = calc_field(shader);
 
    /*
     * if (field.z)
@@ -301,38 +320,12 @@ fetch_ref(struct ureg_program *shader, struct ureg_dst field)
             ureg_scalar(tc[1][0], TGSI_SWIZZLE_Z),
             ureg_src(ref[1]), ureg_imm1f(shader, 0.0f));
 
-   ureg_ADD(shader, result, ureg_src(ref[0]), ureg_src(ref[1]));
+   ureg_ADD(shader, fragment, ureg_src(ref[0]), ureg_src(ref[1]));
 
    for (i = 0; i < 2; ++i)
       ureg_release_temporary(shader, ref[i]);
 
-   return result;
-}
-
-static void *
-create_frag_shader(struct vl_mpeg12_mc_renderer *r, float scale)
-{
-   struct ureg_program *shader;
-   struct ureg_dst result;
-   struct ureg_dst field, texel;
-   struct ureg_dst fragment;
-
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
-
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
-   field = calc_field(shader);
-   texel = fetch_ycbcr(r, shader, field, scale);
-
-   result = fetch_ref(shader, field);
-
-   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(result));
-
    ureg_release_temporary(shader, field);
-   ureg_release_temporary(shader, texel);
-   ureg_release_temporary(shader, result);
    ureg_END(shader);
 
    return ureg_create_shader_and_destroy(shader, r->pipe);
@@ -342,6 +335,7 @@ static bool
 init_pipe_state(struct vl_mpeg12_mc_renderer *r)
 {
    struct pipe_sampler_state sampler;
+   struct pipe_blend_state blend;
    struct pipe_rasterizer_state rs_state;
    unsigned filters[3];
    unsigned i;
@@ -390,6 +384,28 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
          goto error_samplers;
    }
 
+   memset(&blend, 0, sizeof blend);
+   blend.independent_blend_enable = 0;
+   blend.rt[0].blend_enable = 0;
+   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.logicop_enable = 0;
+   blend.logicop_func = PIPE_LOGICOP_CLEAR;
+   blend.rt[0].colormask = PIPE_MASK_RGBA;
+   blend.dither = 0;
+   r->blend_clear = r->pipe->create_blend_state(r->pipe, &blend);
+   if (!r->blend_clear)
+      goto error_blend_clear;
+
+   blend.rt[0].blend_enable = 1;
+   r->blend_add = r->pipe->create_blend_state(r->pipe, &blend);
+   if (!r->blend_add)
+      goto error_blend_add;
+
    memset(&rs_state, 0, sizeof(rs_state));
    /*rs_state.sprite_coord_enable */
    rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
@@ -398,10 +414,17 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    rs_state.gl_rasterization_rules = true;
    r->rs_state = r->pipe->create_rasterizer_state(r->pipe, &rs_state);
    if (!r->rs_state)
-      goto error_samplers;
+      goto error_rs_state;
 
    return true;
 
+error_rs_state:
+   r->pipe->delete_blend_state(r->pipe, r->blend_add);
+
+error_blend_add:
+   r->pipe->delete_blend_state(r->pipe, r->blend_clear);
+
+error_blend_clear:
 error_samplers:
    for (i = 0; i < 5; ++i)
       r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
@@ -419,6 +442,8 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
    for (i = 0; i < 3; ++i)
       r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
 
+   r->pipe->delete_blend_state(r->pipe, r->blend_clear);
+   r->pipe->delete_blend_state(r->pipe, r->blend_add);
    r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
 }
 
@@ -448,9 +473,13 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    if (!renderer->vs)
       goto error_vs_shaders;
 
-   renderer->fs = create_frag_shader(renderer, scale);
-   if (!renderer->fs)
-      goto error_fs_shaders;
+   renderer->fs_ref = create_ref_frag_shader(renderer);
+   if (!renderer->fs_ref)
+      goto error_fs_ref_shaders;
+
+   renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale);
+   if (!renderer->fs_ycbcr)
+      goto error_fs_ycbcr_shaders;
 
    /* create a dummy sampler */
    memset(&tex_templ, 0, sizeof(tex_templ));
@@ -479,9 +508,12 @@ vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
    return true;
 
 error_dummy:
-   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs);
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
+
+error_fs_ycbcr_shaders:
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
 
-error_fs_shaders:
+error_fs_ref_shaders:
    renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs);
 
 error_vs_shaders:
@@ -501,7 +533,8 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
    cleanup_pipe_state(renderer);
 
    renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs);
-   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs);
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
 }
 
 bool
@@ -547,7 +580,6 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    renderer->fb_state.height = surface->height;
    renderer->fb_state.cbufs[0] = surface;
 
-
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
    renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
@@ -562,15 +594,27 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 3, renderer->samplers.all);
 
    renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
-   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs);
+
+   renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear);
+   if (ref[0] || ref[1]) {
+      renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ref);
+
+      if (not_empty_num_instances > 0)
+         util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
+                                    not_empty_start_instance, not_empty_num_instances);
+
+      if (empty_num_instances > 0)
+         util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
+                                    empty_start_instance, empty_num_instances);
+
+      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add);
+   }
+
+   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
 
    if (not_empty_num_instances > 0)
       util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
                                  not_empty_start_instance, not_empty_num_instances);
 
-   if (empty_num_instances > 0)
-      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
-                                 empty_start_instance, empty_num_instances);
-
    renderer->pipe->flush(renderer->pipe, fence);
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index d4e49216229..afd94bf6138 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -46,7 +46,8 @@ struct vl_mpeg12_mc_renderer
 
    void *rs_state;
 
-   void *vs, *fs;
+   void *blend_clear, *blend_add;
+   void *vs, *fs_ref, *fs_ycbcr;
 
    union
    {
-- 
cgit v1.2.3


From e61a63a65198fe136ff9f1b6b8897c5b3c95b093 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 14 Apr 2011 00:40:24 +0200
Subject: [g3dvl] cleanup blender and sampler views

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 120 ++++++++++-------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  13 +--
 2 files changed, 56 insertions(+), 77 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 1d915908042..0ffb76c6b1c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -246,7 +246,7 @@ create_ycbcr_frag_shader(struct vl_mpeg12_mc_renderer *r, float scale)
 
    ureg_SLT(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc), ureg_imm1f(shader, 0.5f));
 
-   ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
+   ureg_MOV(shader, fragment, ureg_imm4f(shader, 0.0f, 0.0f, 0.0f, 1.0f));
    ureg_IF(shader, ureg_scalar(ureg_src(t_tc), TGSI_SWIZZLE_Z), &label);
 
       ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
@@ -255,9 +255,11 @@ create_ycbcr_frag_shader(struct vl_mpeg12_mc_renderer *r, float scale)
                ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 0.5f));
 
       if (scale != 1.0f)
-         ureg_MAD(shader, fragment, ureg_src(texel), ureg_imm1f(shader, scale), ureg_src(t_tc));
+         ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
+                  ureg_src(texel), ureg_imm1f(shader, scale), ureg_src(t_tc));
       else
-         ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(t_tc));
+         ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
+                  ureg_src(texel), ureg_src(t_tc));
 
    ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
@@ -287,7 +289,7 @@ create_ref_frag_shader(struct vl_mpeg12_mc_renderer *r)
    tc[1][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_BOTTOM, TGSI_INTERPOLATE_LINEAR);
 
    for (i = 0; i < 2; ++i) {
-      sampler[i] = ureg_DECL_sampler(shader, i + 1);
+      sampler[i] = ureg_DECL_sampler(shader, i);
       ref[i] = ureg_DECL_temporary(shader);
    }
 
@@ -320,7 +322,8 @@ create_ref_frag_shader(struct vl_mpeg12_mc_renderer *r)
             ureg_scalar(tc[1][0], TGSI_SWIZZLE_Z),
             ureg_src(ref[1]), ureg_imm1f(shader, 0.0f));
 
-   ureg_ADD(shader, fragment, ureg_src(ref[0]), ureg_src(ref[1]));
+   ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(ref[0]), ureg_src(ref[1]));
+   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
 
    for (i = 0; i < 2; ++i)
       ureg_release_temporary(shader, ref[i]);
@@ -337,8 +340,6 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    struct pipe_sampler_state sampler;
    struct pipe_blend_state blend;
    struct pipe_rasterizer_state rs_state;
-   unsigned filters[3];
-   unsigned i;
 
    assert(r);
 
@@ -352,47 +353,33 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    r->fb_state.nr_cbufs = 1;
    r->fb_state.zsbuf = NULL;
 
-   /* source filter */
-   filters[0] = PIPE_TEX_FILTER_NEAREST;
-
-   /* Fwd, bkwd ref filters */
-   filters[1] = PIPE_TEX_FILTER_LINEAR;
-   filters[2] = PIPE_TEX_FILTER_LINEAR;
-
-   for (i = 0; i < 3; ++i) {
-      memset(&sampler, 0, sizeof(sampler));
-      sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-      sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
-      sampler.min_img_filter = filters[i];
-      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-      sampler.mag_img_filter = filters[i];
-      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
-      sampler.compare_func = PIPE_FUNC_ALWAYS;
-      sampler.normalized_coords = 1;
-      /*sampler.shadow_ambient = ; */
-      /*sampler.lod_bias = ; */
-      sampler.min_lod = 0;
-      /*sampler.max_lod = ; */
-      sampler.border_color[0] = 0.0f;
-      sampler.border_color[1] = 0.0f;
-      sampler.border_color[2] = 0.0f;
-      sampler.border_color[3] = 0.0f;
-      /*sampler.max_anisotropy = ; */
-      r->samplers.all[i] = r->pipe->create_sampler_state(r->pipe, &sampler);
-      if (!r->samplers.all[i])
-         goto error_samplers;
-   }
+   memset(&sampler, 0, sizeof(sampler));
+   sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+   sampler.compare_func = PIPE_FUNC_ALWAYS;
+   sampler.normalized_coords = 1;
+   r->sampler_ref = r->pipe->create_sampler_state(r->pipe, &sampler);
+   if (!r->sampler_ref)
+      goto error_sampler_ref;
+
+   r->sampler_ycbcr = r->pipe->create_sampler_state(r->pipe, &sampler);
+   if (!r->sampler_ycbcr)
+      goto error_sampler_ycbcr;
 
    memset(&blend, 0, sizeof blend);
    blend.independent_blend_enable = 0;
-   blend.rt[0].blend_enable = 0;
+   blend.rt[0].blend_enable = 1;
    blend.rt[0].rgb_func = PIPE_BLEND_ADD;
-   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
    blend.rt[0].alpha_func = PIPE_BLEND_ADD;
-   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
    blend.logicop_enable = 0;
    blend.logicop_func = PIPE_LOGICOP_CLEAR;
    blend.rt[0].colormask = PIPE_MASK_RGBA;
@@ -401,7 +388,8 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    if (!r->blend_clear)
       goto error_blend_clear;
 
-   blend.rt[0].blend_enable = 1;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
    r->blend_add = r->pipe->create_blend_state(r->pipe, &blend);
    if (!r->blend_add)
       goto error_blend_add;
@@ -425,23 +413,22 @@ error_blend_add:
    r->pipe->delete_blend_state(r->pipe, r->blend_clear);
 
 error_blend_clear:
-error_samplers:
-   for (i = 0; i < 5; ++i)
-      r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
+   r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
 
+error_sampler_ref:
+   r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr);
+
+error_sampler_ycbcr:
    return false;
 }
 
 static void
 cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
 {
-   unsigned i;
-
    assert(r);
 
-   for (i = 0; i < 3; ++i)
-      r->pipe->delete_sampler_state(r->pipe, r->samplers.all[i]);
-
+   r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
+   r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr);
    r->pipe->delete_blend_state(r->pipe, r->blend_clear);
    r->pipe->delete_blend_state(r->pipe, r->blend_add);
    r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
@@ -544,7 +531,7 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
    assert(renderer && buffer);
    assert(source);
 
-   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
+   pipe_sampler_view_reference(&buffer->source, source);
 
    return true;
 }
@@ -552,12 +539,9 @@ vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg1
 void
 vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer)
 {
-   unsigned i;
-
    assert(buffer);
 
-   for (i = 0; i < 3; ++i)
-      pipe_sampler_view_reference(&buffer->sampler_views.all[i], NULL);
+   pipe_sampler_view_reference(&buffer->source, NULL);
 }
 
 void
@@ -584,21 +568,23 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
 
-   /* if no reference frame provided use a dummy sampler instead */
-   pipe_sampler_view_reference(&buffer->sampler_views.individual.ref[0],
-                               ref[0] ? ref[0] : renderer->dummy);
-   pipe_sampler_view_reference(&buffer->sampler_views.individual.ref[1],
-                               ref[1] ? ref[1] : renderer->dummy);
-
-   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 3, buffer->sampler_views.all);
-   renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 3, renderer->samplers.all);
-
    renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
 
    renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear);
    if (ref[0] || ref[1]) {
+      void *samplers[2];
+
       renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ref);
 
+      /* if no reference frame provided use a dummy sampler instead */
+      if (!ref[0]) ref[0] = renderer->dummy;
+      if (!ref[1]) ref[1] = renderer->dummy;
+
+      renderer->pipe->set_fragment_sampler_views(renderer->pipe, 2, ref);
+
+      samplers[0] = samplers[1] = renderer->sampler_ref;
+      renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 2, samplers);
+
       if (not_empty_num_instances > 0)
          util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
                                     not_empty_start_instance, not_empty_num_instances);
@@ -610,6 +596,8 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
       renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add);
    }
 
+   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &buffer->source);
+   renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ycbcr);
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
 
    if (not_empty_num_instances > 0)
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index afd94bf6138..f71bca5e821 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -48,23 +48,14 @@ struct vl_mpeg12_mc_renderer
 
    void *blend_clear, *blend_add;
    void *vs, *fs_ref, *fs_ycbcr;
-
-   union
-   {
-      void *all[3];
-      struct { void *source, *ref[2]; } individual;
-   } samplers;
+   void *sampler_ref, *sampler_ycbcr;
 
    struct pipe_sampler_view *dummy;
 };
 
 struct vl_mpeg12_mc_buffer
 {
-   union
-   {
-      struct pipe_sampler_view *all[3];
-      struct { struct pipe_sampler_view *source, *ref[2]; } individual;
-   } sampler_views;
+   struct pipe_sampler_view *source;
 };
 
 bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
-- 
cgit v1.2.3


From 10c49b28752f5f2d822dfb1e2e6a1ec213cc44da Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 14 Apr 2011 22:31:40 +0200
Subject: [g3dvl] use blending for mc of ref frames

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c     |  67 +++++----
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h     |   3 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 177 ++++++++++-------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  30 ++--
 src/gallium/auxiliary/vl/vl_types.h              |   5 +
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     |  44 +++---
 src/gallium/auxiliary/vl/vl_vertex_buffers.h     |  10 +-
 7 files changed, 171 insertions(+), 165 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 669d082f873..906be3775c4 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -193,6 +193,8 @@ vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
    struct vl_mpeg12_decoder *dec;
+   unsigned i;
+
    assert(buf);
 
    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
@@ -203,9 +205,8 @@ vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
 
    buf->mc_source->destroy(buf->mc_source);
    vl_vb_cleanup(&buf->vertex_stream);
-   vl_mpeg12_mc_cleanup_buffer(&buf->mc[0]);
-   vl_mpeg12_mc_cleanup_buffer(&buf->mc[1]);
-   vl_mpeg12_mc_cleanup_buffer(&buf->mc[2]);
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      vl_mc_cleanup_buffer(&buf->mc[i]);
 
    FREE(buf);
 }
@@ -267,6 +268,7 @@ static void
 vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
 {
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
+   unsigned i;
 
    assert(decoder);
 
@@ -277,14 +279,17 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    dec->pipe->delete_blend_state(dec->pipe, dec->blend);
    dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa);
 
-   vl_mpeg12_mc_renderer_cleanup(&dec->mc);
+   vl_mc_cleanup(&dec->mc);
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
       vl_idct_cleanup(&dec->idct_y);
       vl_idct_cleanup(&dec->idct_c);
    }
-   dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves[0]);
-   dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves[1]);
-   dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves[2]);
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_eb[i]);
+
+   for (i = 0; i < 2; ++i)
+      dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv[i]);
+
    pipe_resource_reference(&dec->quads.buffer, NULL);
 
    FREE(dec);
@@ -409,22 +414,22 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    if (!mc_source_sv)
       goto error_mc_source_sv;
 
-   if(!vl_mpeg12_mc_init_buffer(&dec->mc, &buffer->mc[0], mc_source_sv[0]))
+   if(!vl_mc_init_buffer(&dec->mc, &buffer->mc[0], mc_source_sv[0]))
       goto error_mc_y;
 
-   if(!vl_mpeg12_mc_init_buffer(&dec->mc, &buffer->mc[1], mc_source_sv[1]))
+   if(!vl_mc_init_buffer(&dec->mc, &buffer->mc[1], mc_source_sv[1]))
       goto error_mc_cb;
 
-   if(!vl_mpeg12_mc_init_buffer(&dec->mc, &buffer->mc[2], mc_source_sv[2]))
+   if(!vl_mc_init_buffer(&dec->mc, &buffer->mc[2], mc_source_sv[2]))
       goto error_mc_cr;
 
    return &buffer->base;
 
 error_mc_cr:
-   vl_mpeg12_mc_cleanup_buffer(&buffer->mc[1]);
+   vl_mc_cleanup_buffer(&buffer->mc[1]);
 
 error_mc_cb:
-   vl_mpeg12_mc_cleanup_buffer(&buffer->mc[0]);
+   vl_mc_cleanup_buffer(&buffer->mc[0]);
 
 error_mc_y:
 error_mc_source_sv:
@@ -451,20 +456,19 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
    struct vl_mpeg12_decoder *dec;
 
-   struct pipe_sampler_view **sv_past;
-   struct pipe_sampler_view **sv_future;
+   struct pipe_sampler_view **sv[2];
    struct pipe_surface **surfaces;
 
    unsigned ne_start, ne_num, e_start, e_num;
-   unsigned i;
+   unsigned i, j;
 
    assert(buf);
 
    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
    assert(dec);
 
-   sv_past = refs[0] ? refs[0]->get_sampler_views(refs[0]) : NULL;
-   sv_future = refs[1] ? refs[1]->get_sampler_views(refs[1]) : NULL;
+   for (i = 0; i < 2; ++i)
+      sv[i] = refs[i] ? refs[i]->get_sampler_views(refs[i]) : NULL;
 
    surfaces = dst->get_surfaces(dst);
 
@@ -473,20 +477,28 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
    dec->pipe->set_vertex_buffers(dec->pipe, 2, buf->vertex_bufs.all);
 
    for (i = 0; i < VL_MAX_PLANES; ++i) {
-      struct pipe_sampler_view *sv_refs[2];
+      bool first = true;
+
+      vl_mc_set_surface(&dec->mc, surfaces[i]);
+
+      for (j = 0; j < 2; ++j) {
+         if (sv[j] == NULL) continue;
+
+         dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_mv[j]);
+         vl_mc_render_ref(&buf->mc[i], sv[j][i], first, ne_start, ne_num, e_start, e_num);
+         first = false;
+      }
 
       dec->pipe->bind_blend_state(dec->pipe, dec->blend);
-      dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves[i]);
+      dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_eb[i]);
 
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
          vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], ne_num);
 
-      sv_refs[0] = sv_past ? sv_past[i] : NULL;
-      sv_refs[1] = sv_future ? sv_future[i] : NULL;
+      vl_mc_render_ycbcr(&buf->mc[i], first, ne_start, ne_num);
 
-      vl_mpeg12_mc_renderer_flush(&dec->mc, &buf->mc[i], surfaces[i], sv_refs,
-                                  ne_start, ne_num, e_start, e_num, fence);
    }
+   dec->pipe->flush(dec->pipe, fence);
 }
 
 static void
@@ -703,7 +715,10 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
 
    dec->quads = vl_vb_upload_quads(dec->pipe, 2, 2);
    for (i = 0; i < VL_MAX_PLANES; ++i)
-      dec->ves[i] = vl_vb_get_elems_state(dec->pipe, i);
+      dec->ves_eb[i] = vl_vb_get_elems_state(dec->pipe, i, 0);
+
+   for (i = 0; i < 2; ++i)
+      dec->ves_mv[i] = vl_vb_get_elems_state(dec->pipe, 0, i);
 
    dec->base.width = align(width, MACROBLOCK_WIDTH);
    dec->base.height = align(height, MACROBLOCK_HEIGHT);
@@ -741,7 +756,7 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
       }
    }
 
-   if (!vl_mpeg12_mc_renderer_init(&dec->mc, dec->pipe, dec->base.width, dec->base.height, mc_scale))
+   if (!vl_mc_init(&dec->mc, dec->pipe, dec->base.width, dec->base.height, mc_scale))
       goto error_mc;
 
    if (!init_pipe_state(dec))
@@ -750,7 +765,7 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    return &dec->base;
 
 error_pipe_state:
-   vl_mpeg12_mc_renderer_cleanup(&dec->mc);
+   vl_mc_cleanup(&dec->mc);
 
 error_mc:
    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 25048e8543c..c27197f6664 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -52,7 +52,8 @@ struct vl_mpeg12_decoder
    enum pipe_format mc_source_format;
 
    struct pipe_vertex_buffer quads;
-   void *ves[VL_MAX_PLANES];
+   void *ves_eb[VL_MAX_PLANES];
+   void *ves_mv[2];
 
    struct vl_idct idct_y, idct_c;
    struct vl_mpeg12_mc_renderer mc;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 0ffb76c6b1c..dde7846ffb2 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -44,10 +44,8 @@ enum VS_OUTPUT
    VS_O_LINE,
    VS_O_TEX_TOP,
    VS_O_TEX_BOTTOM,
-   VS_O_MV0_TOP,
-   VS_O_MV0_BOTTOM,
-   VS_O_MV1_TOP,
-   VS_O_MV1_BOTTOM
+   VS_O_MV_TOP,
+   VS_O_MV_BOTTOM
 };
 
 static void *
@@ -55,10 +53,10 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
    struct ureg_src block_scale, mv_scale;
-   struct ureg_src vrect, vpos, eb, flags, vmv[2][2];
+   struct ureg_src vrect, vpos, eb, flags, vmv[2];
    struct ureg_dst t_vpos, t_vtex, t_vmv;
-   struct ureg_dst o_vpos, o_line, o_vtex[2], o_vmv[2][2];
-   unsigned i, j, label;
+   struct ureg_dst o_vpos, o_line, o_vtex[2], o_vmv[2];
+   unsigned i, label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
@@ -72,19 +70,15 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
    eb = ureg_DECL_vs_input(shader, VS_I_EB);
    flags = ureg_DECL_vs_input(shader, VS_I_FLAGS);
-   vmv[0][0] = ureg_DECL_vs_input(shader, VS_I_MV0_TOP);
-   vmv[0][1] = ureg_DECL_vs_input(shader, VS_I_MV0_BOTTOM);
-   vmv[1][0] = ureg_DECL_vs_input(shader, VS_I_MV1_TOP);
-   vmv[1][1] = ureg_DECL_vs_input(shader, VS_I_MV1_BOTTOM);
+   vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP);
+   vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
    o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
    o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_TOP);
    o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_BOTTOM);
-   o_vmv[0][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_TOP);
-   o_vmv[0][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_BOTTOM);
-   o_vmv[1][0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_TOP);
-   o_vmv[1][1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_BOTTOM);
+   o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV_TOP);
+   o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV_BOTTOM);
 
    /*
     * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
@@ -125,20 +119,21 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
       (float)MACROBLOCK_WIDTH / r->buffer_width,
       (float)MACROBLOCK_HEIGHT / r->buffer_height);
 
-   mv_scale = ureg_imm2f(shader,
+   mv_scale = ureg_imm4f(shader,
       0.5f / r->buffer_width,
-      0.5f / r->buffer_height);
+      0.5f / r->buffer_height,
+      1.0f,
+      1.0f / 255.0f);
 
    ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
    ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
 
-   for (i = 0; i < 2; ++i)
-      for (j = 0; j < 2; ++j) {
-         ureg_MAD(shader, ureg_writemask(o_vmv[i][j], TGSI_WRITEMASK_XY), mv_scale, vmv[i][j], ureg_src(t_vpos));
-         ureg_MOV(shader, ureg_writemask(o_vmv[i][j], TGSI_WRITEMASK_Z), ureg_scalar(flags, TGSI_SWIZZLE_Z + i));
-      }
+   for (i = 0; i < 2; ++i) {
+      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
+      ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_W), mv_scale, vmv[i]);
+   }
 
    ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_CMP(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_Z),
@@ -274,24 +269,19 @@ static void *
 create_ref_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src tc[2][2], sampler[2];
-   struct ureg_dst ref[2], field;
+   struct ureg_src tc[2], sampler;
+   struct ureg_dst ref, field;
    struct ureg_dst fragment;
-   unsigned i;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return NULL;
 
-   tc[0][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_TOP, TGSI_INTERPOLATE_LINEAR);
-   tc[0][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV0_BOTTOM, TGSI_INTERPOLATE_LINEAR);
-   tc[1][0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_TOP, TGSI_INTERPOLATE_LINEAR);
-   tc[1][1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV1_BOTTOM, TGSI_INTERPOLATE_LINEAR);
+   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV_TOP, TGSI_INTERPOLATE_LINEAR);
+   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV_BOTTOM, TGSI_INTERPOLATE_LINEAR);
 
-   for (i = 0; i < 2; ++i) {
-      sampler[i] = ureg_DECL_sampler(shader, i);
-      ref[i] = ureg_DECL_temporary(shader);
-   }
+   sampler = ureg_DECL_sampler(shader, 0);
+   ref = ureg_DECL_temporary(shader);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -304,29 +294,12 @@ create_ref_frag_shader(struct vl_mpeg12_mc_renderer *r)
     *    ref[0..1] = tex(tc[2..3], sampler[0..1])
     * result = LRP(info.y, ref[0..1])
     */
-   ureg_CMP(shader, ureg_writemask(ref[0], TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
-            tc[0][1], tc[0][0]);
-   ureg_CMP(shader, ureg_writemask(ref[1], TGSI_WRITEMASK_XY),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
-            tc[1][1], tc[1][0]);
+   ureg_CMP(shader, ref, ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), tc[1], tc[0]);
 
-   ureg_TEX(shader, ref[0], TGSI_TEXTURE_2D, ureg_src(ref[0]), sampler[0]);
-   ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, ureg_src(ref[1]), sampler[1]);
+   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(ref));
+   ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), TGSI_TEXTURE_2D, ureg_src(ref), sampler);
 
-   ureg_LRP(shader, ref[0],
-            ureg_scalar(tc[0][0], TGSI_SWIZZLE_Z),
-            ureg_src(ref[0]), ureg_imm1f(shader, 0.0f));
-
-   ureg_LRP(shader, ref[1],
-            ureg_scalar(tc[1][0], TGSI_SWIZZLE_Z),
-            ureg_src(ref[1]), ureg_imm1f(shader, 0.0f));
-
-   ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(ref[0]), ureg_src(ref[1]));
-   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
-
-   for (i = 0; i < 2; ++i)
-      ureg_release_temporary(shader, ref[i]);
+   ureg_release_temporary(shader, ref);
 
    ureg_release_temporary(shader, field);
    ureg_END(shader);
@@ -435,11 +408,8 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
 }
 
 bool
-vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
-                           struct pipe_context *pipe,
-                           unsigned buffer_width,
-                           unsigned buffer_height,
-                           float scale)
+vl_mc_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe,
+           unsigned buffer_width, unsigned buffer_height, float scale)
 {
    struct pipe_resource tex_templ, *tex_dummy;
    struct pipe_sampler_view sampler_view;
@@ -511,7 +481,7 @@ error_pipe_state:
 }
 
 void
-vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
+vl_mc_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 {
    assert(renderer);
 
@@ -525,19 +495,21 @@ vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 }
 
 bool
-vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
-                         struct pipe_sampler_view *source)
+vl_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
+                  struct pipe_sampler_view *source)
 {
    assert(renderer && buffer);
    assert(source);
 
+   buffer->renderer = renderer;
+
    pipe_sampler_view_reference(&buffer->source, source);
 
    return true;
 }
 
 void
-vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer)
+vl_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer)
 {
    assert(buffer);
 
@@ -545,17 +517,9 @@ vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer)
 }
 
 void
-vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
-                            struct pipe_surface *surface, struct pipe_sampler_view *ref[2],
-                            unsigned not_empty_start_instance, unsigned not_empty_num_instances,
-                            unsigned empty_start_instance, unsigned empty_num_instances,
-                            struct pipe_fence_handle **fence)
+vl_mc_set_surface(struct vl_mpeg12_mc_renderer *renderer, struct pipe_surface *surface)
 {
-   assert(renderer && buffer);
-   assert(surface && ref);
-
-   if (not_empty_num_instances == 0 && empty_num_instances == 0)
-      return;
+   assert(renderer && surface);
 
    renderer->viewport.scale[0] = surface->width;
    renderer->viewport.scale[1] = surface->height;
@@ -563,46 +527,65 @@ vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mp
    renderer->fb_state.width = surface->width;
    renderer->fb_state.height = surface->height;
    renderer->fb_state.cbufs[0] = surface;
+}
+
+void
+vl_mc_render_ref(struct vl_mpeg12_mc_buffer *buffer,
+                 struct pipe_sampler_view *ref, bool first,
+                 unsigned not_empty_start_instance, unsigned not_empty_num_instances,
+                 unsigned empty_start_instance, unsigned empty_num_instances)
+{
+   struct vl_mpeg12_mc_renderer *renderer;
+
+   assert(buffer && ref);
+
+   if (not_empty_num_instances == 0 && empty_num_instances == 0)
+      return;
 
+   renderer = buffer->renderer;
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
    renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
+   renderer->pipe->bind_blend_state(renderer->pipe, first ? renderer->blend_clear : renderer->blend_add);
 
    renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
+   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ref);
 
-   renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear);
-   if (ref[0] || ref[1]) {
-      void *samplers[2];
+   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &ref);
+   renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ref);
 
-      renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ref);
+   if (not_empty_num_instances > 0)
+      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
+                                 not_empty_start_instance, not_empty_num_instances);
 
-      /* if no reference frame provided use a dummy sampler instead */
-      if (!ref[0]) ref[0] = renderer->dummy;
-      if (!ref[1]) ref[1] = renderer->dummy;
+   if (empty_num_instances > 0)
+      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
+                                 empty_start_instance, empty_num_instances);
+}
 
-      renderer->pipe->set_fragment_sampler_views(renderer->pipe, 2, ref);
+void
+vl_mc_render_ycbcr(struct vl_mpeg12_mc_buffer *buffer, bool first,
+                   unsigned not_empty_start_instance, unsigned not_empty_num_instances)
+{
+   struct vl_mpeg12_mc_renderer *renderer;
 
-      samplers[0] = samplers[1] = renderer->sampler_ref;
-      renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 2, samplers);
+   assert(buffer);
 
-      if (not_empty_num_instances > 0)
-         util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
-                                    not_empty_start_instance, not_empty_num_instances);
+   if (not_empty_num_instances == 0)
+      return;
 
-      if (empty_num_instances > 0)
-         util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
-                                    empty_start_instance, empty_num_instances);
+   renderer = buffer->renderer;
+   renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
+   renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
+   renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
+   renderer->pipe->bind_blend_state(renderer->pipe, first ? renderer->blend_clear : renderer->blend_add);
 
-      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add);
-   }
+   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
+   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
 
    renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &buffer->source);
    renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ycbcr);
-   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
-
-   if (not_empty_num_instances > 0)
-      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
-                                 not_empty_start_instance, not_empty_num_instances);
 
-   renderer->pipe->flush(renderer->pipe, fence);
+   util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
+                              not_empty_start_instance, not_empty_num_instances);
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index f71bca5e821..3b5e61df02c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -55,26 +55,28 @@ struct vl_mpeg12_mc_renderer
 
 struct vl_mpeg12_mc_buffer
 {
+   struct vl_mpeg12_mc_renderer *renderer;
    struct pipe_sampler_view *source;
 };
 
-bool vl_mpeg12_mc_renderer_init(struct vl_mpeg12_mc_renderer *renderer,
-                                struct pipe_context *pipe,
-                                unsigned picture_width,
-                                unsigned picture_height,
-                                float scale);
+bool vl_mc_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe,
+                unsigned picture_width, unsigned picture_height, float scale);
 
-void vl_mpeg12_mc_renderer_cleanup(struct vl_mpeg12_mc_renderer *renderer);
+void vl_mc_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 
-bool vl_mpeg12_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
-                              struct pipe_sampler_view *source);
+bool vl_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
+                       struct pipe_sampler_view *source);
 
-void vl_mpeg12_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer);
+void vl_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer);
 
-void vl_mpeg12_mc_renderer_flush(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
-                                 struct pipe_surface *surface, struct pipe_sampler_view *ref[2],
-                                 unsigned not_empty_start_instance, unsigned not_empty_num_instances,
-                                 unsigned empty_start_instance, unsigned empty_num_instances,
-                                 struct pipe_fence_handle **fence);
+void vl_mc_set_surface(struct vl_mpeg12_mc_renderer *renderer, struct pipe_surface *surface);
+
+void vl_mc_render_ref(struct vl_mpeg12_mc_buffer *buffer,
+                      struct pipe_sampler_view *ref, bool first,
+                      unsigned not_empty_start_instance, unsigned not_empty_num_instances,
+                      unsigned empty_start_instance, unsigned empty_num_instances);
+
+void vl_mc_render_ycbcr(struct vl_mpeg12_mc_buffer *buffer, bool first,
+                        unsigned not_empty_start_instance, unsigned not_empty_num_instances);
 
 #endif /* vl_mpeg12_mc_renderer_h */
diff --git a/src/gallium/auxiliary/vl/vl_types.h b/src/gallium/auxiliary/vl/vl_types.h
index a927e829349..27bb69d67bc 100644
--- a/src/gallium/auxiliary/vl/vl_types.h
+++ b/src/gallium/auxiliary/vl/vl_types.h
@@ -43,4 +43,9 @@ struct vertex4f
    float x, y, z, w;
 };
 
+struct vertex4s
+{
+   short x, y, z, w;
+};
+
 #endif /* vl_types_h */
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index c834042e8ae..4ec1905af20 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -33,11 +33,11 @@
 struct vl_vertex_stream
 {
    struct vertex2s pos;
-   uint8_t eb[3][2][2];
    uint8_t dct_type_field;
    uint8_t mb_type_intra;
-   uint8_t mv_wheights[2];
-   struct vertex2s mv[4];
+   uint8_t dummy[2];
+   uint8_t eb[3][2][2];
+   struct vertex4s mv[4];
 };
 
 /* vertices for a quad covering a block */
@@ -130,7 +130,7 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements
 }
 
 void *
-vl_vb_get_elems_state(struct pipe_context *pipe, int component)
+vl_vb_get_elems_state(struct pipe_context *pipe, int component, int motionvector)
 {
    struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
 
@@ -140,25 +140,19 @@ vl_vb_get_elems_state(struct pipe_context *pipe, int component)
    /* Position element */
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
 
-   /* empty block element of selected component */
-   vertex_elems[VS_I_EB].src_offset = 4 + component * 4;
-   vertex_elems[VS_I_EB].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
-
    /* flags */
-   vertex_elems[VS_I_FLAGS].src_offset = 16;
    vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_UNORM;
 
-   /* motion vector 0 TOP element */
-   vertex_elems[VS_I_MV0_TOP].src_format = PIPE_FORMAT_R16G16_SSCALED;
-
-   /* motion vector 0 BOTTOM element */
-   vertex_elems[VS_I_MV0_BOTTOM].src_format = PIPE_FORMAT_R16G16_SSCALED;
+   /* empty block element of selected component */
+   vertex_elems[VS_I_EB].src_offset = 8 + component * 4;
+   vertex_elems[VS_I_EB].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
 
-   /* motion vector 1 TOP element */
-   vertex_elems[VS_I_MV1_TOP].src_format = PIPE_FORMAT_R16G16_SSCALED;
+   /* motion vector TOP element */
+   vertex_elems[VS_I_MV_TOP].src_offset = 20 + motionvector * 16;
+   vertex_elems[VS_I_MV_TOP].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED;
 
-   /* motion vector 1 BOTTOM element */
-   vertex_elems[VS_I_MV1_BOTTOM].src_format = PIPE_FORMAT_R16G16_SSCALED;
+   /* motion vector BOTTOM element */
+   vertex_elems[VS_I_MV_BOTTOM].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED;
 
    vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - 1, 1);
 
@@ -209,33 +203,43 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 }
 
 static void
-get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex2s mv[4])
+get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex4s mv[4])
 {
    if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
       mv[0].x = mv[1].x = mb->mv[0].top.x;
       mv[0].y = mv[1].y = mb->mv[0].top.y;
+      mv[0].z = 0; mv[1].z = 1;
+
       mv[2].x = mv[3].x = mb->mv[1].top.x;
       mv[2].y = mv[3].y = mb->mv[1].top.y;
+      mv[2].z = 0; mv[3].z = 1;
 
    } else {
       mv[0].x = mb->mv[0].top.x;
       mv[0].y = mb->mv[0].top.y - (mb->mv[0].top.y % 4);
+      mv[0].z = mb->mv[0].top.field_select;
 
       mv[1].x = mb->mv[0].bottom.x;
       mv[1].y = mb->mv[0].bottom.y - (mb->mv[0].bottom.y % 4);
+      mv[1].z = mb->mv[0].bottom.field_select;
 
       if (mb->mv[0].top.field_select) mv[0].y += 2;
       if (!mb->mv[0].bottom.field_select) mv[1].y -= 2;
 
       mv[2].x = mb->mv[1].top.x;
       mv[2].y = mb->mv[1].top.y - (mb->mv[1].top.y % 4);
+      mv[2].z = mb->mv[1].top.field_select;
 
       mv[3].x = mb->mv[1].bottom.x;
       mv[3].y = mb->mv[1].bottom.y - (mb->mv[1].bottom.y % 4);
+      mv[3].z = mb->mv[1].bottom.field_select;
 
       if (mb->mv[1].top.field_select) mv[2].y += 2;
       if (!mb->mv[1].bottom.field_select) mv[3].y -= 2;
    }
+
+   mv[0].w = mv[1].w = mb->mv[0].wheight;
+   mv[2].w = mv[3].w = mb->mv[1].wheight;
 }
 
 void
@@ -265,8 +269,6 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *
    stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
    stream->mb_type_intra = !mb->dct_intra;
 
-   stream->mv_wheights[0] = mb->mv[0].wheight;
-   stream->mv_wheights[1] = mb->mv[1].wheight;
    get_motion_vectors(mb, stream->mv);
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 58b841836d0..6cbda7cc9b1 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -42,12 +42,10 @@ enum VS_INPUT
 {
    VS_I_RECT,
    VS_I_VPOS,
-   VS_I_EB,
    VS_I_FLAGS,
-   VS_I_MV0_TOP,
-   VS_I_MV0_BOTTOM,
-   VS_I_MV1_TOP,
-   VS_I_MV1_BOTTOM,
+   VS_I_EB,
+   VS_I_MV_TOP,
+   VS_I_MV_BOTTOM,
 
    NUM_VS_INPUTS
 };
@@ -66,7 +64,7 @@ struct vl_vertex_buffer
 struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe,
                                              unsigned blocks_x, unsigned blocks_y);
 
-void *vl_vb_get_elems_state(struct pipe_context *pipe, int component);
+void *vl_vb_get_elems_state(struct pipe_context *pipe, int component, int motionvector);
 
 struct pipe_vertex_buffer vl_vb_init(struct vl_vertex_buffer *buffer,
                                      struct pipe_context *pipe,
-- 
cgit v1.2.3


From 4fc4f7b9ea83f02a81e9a3f57db7ae655490379a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 15 Apr 2011 00:01:27 +0200
Subject: [g3dvl] set ref samplers to linear again

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index dde7846ffb2..702d7e305fd 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -330,9 +330,9 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
    sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
-   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
    sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
    sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
    sampler.compare_func = PIPE_FUNC_ALWAYS;
    sampler.normalized_coords = 1;
@@ -340,6 +340,8 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
    if (!r->sampler_ref)
       goto error_sampler_ref;
 
+   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
    r->sampler_ycbcr = r->pipe->create_sampler_state(r->pipe, &sampler);
    if (!r->sampler_ycbcr)
       goto error_sampler_ycbcr;
-- 
cgit v1.2.3


From b1c44b0ea6b3e891086ce554edf3c26dbd3708cd Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 15 Apr 2011 21:26:06 +0200
Subject: [g3dvl] give mv and ycbcr stage its own vertex shader

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 238 ++++++++++++++---------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |   3 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     |   6 +-
 3 files changed, 153 insertions(+), 94 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 702d7e305fd..65909a57b26 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -42,65 +42,80 @@ enum VS_OUTPUT
 {
    VS_O_VPOS,
    VS_O_LINE,
-   VS_O_TEX_TOP,
-   VS_O_TEX_BOTTOM,
-   VS_O_MV_TOP,
-   VS_O_MV_BOTTOM
+   VS_O_VTOP,
+   VS_O_VBOTTOM
 };
 
+static struct ureg_dst
+calc_position(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader)
+{
+   struct ureg_src block_scale;
+   struct ureg_src vrect, vpos;
+   struct ureg_dst t_vpos;
+   struct ureg_dst o_vpos;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   t_vpos = ureg_DECL_temporary(shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+
+   /*
+    * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
+    *
+    * t_vpos = (vpos + vrect) * block_scale
+    * o_vpos.xy = t_vpos
+    * o_vpos.zw = vpos
+    */
+   block_scale = ureg_imm2f(shader,
+      (float)MACROBLOCK_WIDTH / r->buffer_width,
+      (float)MACROBLOCK_HEIGHT / r->buffer_height);
+
+   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
+   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
+
+   return t_vpos;
+}
+
 static void *
-create_vert_shader(struct vl_mpeg12_mc_renderer *r)
+create_ycbcr_vert_shader(struct vl_mpeg12_mc_renderer *r)
 {
    struct ureg_program *shader;
-   struct ureg_src block_scale, mv_scale;
-   struct ureg_src vrect, vpos, eb, flags, vmv[2];
-   struct ureg_dst t_vpos, t_vtex, t_vmv;
-   struct ureg_dst o_vpos, o_line, o_vtex[2], o_vmv[2];
-   unsigned i, label;
+   struct ureg_src block_scale;
+   struct ureg_src vrect, vpos, eb, flags;
+   struct ureg_dst t_vpos, t_vtex;
+   struct ureg_dst o_line, o_vtex[2];
+   unsigned label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
       return NULL;
 
-   t_vpos = ureg_DECL_temporary(shader);
-   t_vtex = ureg_DECL_temporary(shader);
-   t_vmv = ureg_DECL_temporary(shader);
-
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
    eb = ureg_DECL_vs_input(shader, VS_I_EB);
    flags = ureg_DECL_vs_input(shader, VS_I_FLAGS);
-   vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP);
-   vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM);
 
-   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+   t_vpos = calc_position(r, shader);
+   t_vtex = ureg_DECL_temporary(shader);
+
    o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
-   o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_TOP);
-   o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_BOTTOM);
-   o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV_TOP);
-   o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV_BOTTOM);
+   o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
+   o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
 
    /*
     * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
-    * mv_scale = 0.5 / (dst.width, dst.height);
-    *
-    * t_vpos = (vpos + vrect) * block_scale
-    * o_vpos.xy = t_vpos
-    * o_vpos.zw = vpos
-    *
-    * o_eb[0..1] = vrect.x ? eb[0..1][1] : eb[0..1][0]
     *
-    * o_frame_pred = frame_pred
-    * o_info.x = not_intra
-    * o_info.y = ref_weight / 2
+    * o_line.x = interlaced
+    * o_line.y = vrect
     *
-    * // Apply motion vectors
-    * o_vmv[0..3] = t_vpos + vmv[0..3] * mv_scale
+    * o_vtex[0].z = vrect.x ? eb.y : eb.x
+    * o_vtex[1].z = vrect.x ? eb.w : eb.z
     *
-    * o_line.xy = vrect * 8
-    * o_line.z = interlaced
-    *
-    * if(eb[0][0].w) { //interlaced
+    * if(interlaced) {
     *    t_vtex.x = vrect.x
     *    t_vtex.y = vrect.y * 0.5
     *    t_vtex += vpos
@@ -119,21 +134,8 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
       (float)MACROBLOCK_WIDTH / r->buffer_width,
       (float)MACROBLOCK_HEIGHT / r->buffer_height);
 
-   mv_scale = ureg_imm4f(shader,
-      0.5f / r->buffer_width,
-      0.5f / r->buffer_height,
-      1.0f,
-      1.0f / 255.0f);
-
-   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
-   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
-   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
-
-   for (i = 0; i < 2; ++i) {
-      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
-      ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_W), mv_scale, vmv[i]);
-   }
+   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), flags, ureg_imm1f(shader, 0.5f));
+   ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect);
 
    ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
    ureg_CMP(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_Z),
@@ -147,13 +149,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
             ureg_scalar(eb, TGSI_SWIZZLE_W),
             ureg_scalar(eb, TGSI_SWIZZLE_Z));
 
-   ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), ureg_scalar(vrect, TGSI_SWIZZLE_Y));
-   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
-      vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
-   ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Z),
-            ureg_scalar(flags, TGSI_SWIZZLE_Y));
-
-   ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_X), &label);
+   ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_Y), &label);
 
       ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
       ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
@@ -162,7 +158,7 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
       ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
       ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
 
-      ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X),
+      ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
          ureg_scalar(vrect, TGSI_SWIZZLE_Y),
          ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
 
@@ -171,7 +167,64 @@ create_vert_shader(struct vl_mpeg12_mc_renderer *r)
 
    ureg_release_temporary(shader, t_vtex);
    ureg_release_temporary(shader, t_vpos);
-   ureg_release_temporary(shader, t_vmv);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
+static void *
+create_ref_vert_shader(struct vl_mpeg12_mc_renderer *r)
+{
+   struct ureg_program *shader;
+   struct ureg_src mv_scale;
+   struct ureg_src vrect, vmv[2];
+   struct ureg_dst t_vpos;
+   struct ureg_dst o_vpos, o_line, o_vmv[2];
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   ureg_DECL_vs_input(shader, VS_I_EB);
+   ureg_DECL_vs_input(shader, VS_I_FLAGS);
+   vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP);
+   vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM);
+
+   t_vpos = calc_position(r, shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
+   o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
+   o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
+
+   /*
+    * mv_scale = 0.5 / (dst.width, dst.height);
+    *
+    * // Apply motion vectors
+    * o_vmv[0..3] = t_vpos + vmv[0..3] * mv_scale
+    *
+    * o_line.y = vrect
+    *
+    */
+
+   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
+      vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
+
+   mv_scale = ureg_imm4f(shader,
+      0.5f / r->buffer_width,
+      0.5f / r->buffer_height,
+      1.0f,
+      1.0f / 255.0f);
+
+   for (i = 0; i < 2; ++i) {
+      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
+      ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_W), mv_scale, vmv[i]);
+   }
+
+   ureg_release_temporary(shader, t_vpos);
 
    ureg_END(shader);
 
@@ -189,17 +242,16 @@ calc_field(struct ureg_program *shader)
    line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
 
    /*
-    * line.x going from 0 to 1 if not interlaced
-    * line.x going from 0 to 8 in steps of 0.5 if interlaced
-    * line.y going from 0 to 8 in steps of 0.5
-    * line.z is flag for intra frames
+    * line.x is flag for intra frames
+    * line.y going from 0 to 1 if not interlaced
+    * line.y going from 0 to 8 in steps of 0.5 if interlaced
     *
     * tmp.xy = fraction(line)
     * tmp.xy = tmp.xy >= 0.5 ? 1 : 0
     */
-   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), line);
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
-   ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), line);
+   ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), line);
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
 
    return tmp;
 }
@@ -217,8 +269,8 @@ create_ycbcr_frag_shader(struct vl_mpeg12_mc_renderer *r, float scale)
    if (!shader)
       return NULL;
 
-   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_TOP, TGSI_INTERPOLATE_LINEAR);
-   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX_BOTTOM, TGSI_INTERPOLATE_LINEAR);
+   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
+   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);
 
    sampler = ureg_DECL_sampler(shader, 0);
 
@@ -236,7 +288,7 @@ create_ycbcr_frag_shader(struct vl_mpeg12_mc_renderer *r, float scale)
     */
 
    ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XYZ),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X)),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
             tc[1], tc[0]);
 
    ureg_SLT(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc), ureg_imm1f(shader, 0.5f));
@@ -246,15 +298,13 @@ create_ycbcr_frag_shader(struct vl_mpeg12_mc_renderer *r, float scale)
 
       ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
 
-      ureg_CMP(shader, t_tc, ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Z)),
-               ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 0.5f));
-
       if (scale != 1.0f)
          ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
-                  ureg_src(texel), ureg_imm1f(shader, scale), ureg_src(t_tc));
+                  ureg_src(texel), ureg_imm1f(shader, scale),
+                  ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X));
       else
          ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
-                  ureg_src(texel), ureg_src(t_tc));
+                  ureg_src(texel), ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X));
 
    ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
    ureg_ENDIF(shader);
@@ -277,8 +327,8 @@ create_ref_frag_shader(struct vl_mpeg12_mc_renderer *r)
    if (!shader)
       return NULL;
 
-   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV_TOP, TGSI_INTERPOLATE_LINEAR);
-   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_MV_BOTTOM, TGSI_INTERPOLATE_LINEAR);
+   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
+   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);
 
    sampler = ureg_DECL_sampler(shader, 0);
    ref = ureg_DECL_temporary(shader);
@@ -428,17 +478,21 @@ vl_mc_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe,
    if (!init_pipe_state(renderer))
       goto error_pipe_state;
 
-   renderer->vs = create_vert_shader(renderer);
-   if (!renderer->vs)
-      goto error_vs_shaders;
+   renderer->vs_ref = create_ref_vert_shader(renderer);
+   if (!renderer->vs_ref)
+      goto error_vs_ref;
+
+   renderer->vs_ycbcr = create_ycbcr_vert_shader(renderer);
+   if (!renderer->vs_ycbcr)
+      goto error_vs_ycbcr;
 
    renderer->fs_ref = create_ref_frag_shader(renderer);
    if (!renderer->fs_ref)
-      goto error_fs_ref_shaders;
+      goto error_fs_ref;
 
    renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale);
    if (!renderer->fs_ycbcr)
-      goto error_fs_ycbcr_shaders;
+      goto error_fs_ycbcr;
 
    /* create a dummy sampler */
    memset(&tex_templ, 0, sizeof(tex_templ));
@@ -469,13 +523,16 @@ vl_mc_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe,
 error_dummy:
    renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
 
-error_fs_ycbcr_shaders:
+error_fs_ycbcr:
    renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
 
-error_fs_ref_shaders:
-   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs);
+error_fs_ref:
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr);
+
+error_vs_ycbcr:
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref);
 
-error_vs_shaders:
+error_vs_ref:
    cleanup_pipe_state(renderer);
 
 error_pipe_state:
@@ -491,7 +548,8 @@ vl_mc_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 
    cleanup_pipe_state(renderer);
 
-   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs);
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref);
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr);
    renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
    renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
 }
@@ -550,7 +608,7 @@ vl_mc_render_ref(struct vl_mpeg12_mc_buffer *buffer,
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
    renderer->pipe->bind_blend_state(renderer->pipe, first ? renderer->blend_clear : renderer->blend_add);
 
-   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
+   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ref);
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ref);
 
    renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &ref);
@@ -582,7 +640,7 @@ vl_mc_render_ycbcr(struct vl_mpeg12_mc_buffer *buffer, bool first,
    renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
    renderer->pipe->bind_blend_state(renderer->pipe, first ? renderer->blend_clear : renderer->blend_add);
 
-   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs);
+   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ycbcr);
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
 
    renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &buffer->source);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index 3b5e61df02c..d6561369e28 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -47,7 +47,8 @@ struct vl_mpeg12_mc_renderer
    void *rs_state;
 
    void *blend_clear, *blend_add;
-   void *vs, *fs_ref, *fs_ycbcr;
+   void *vs_ref, *vs_ycbcr;
+   void *fs_ref, *fs_ycbcr;
    void *sampler_ref, *sampler_ycbcr;
 
    struct pipe_sampler_view *dummy;
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 4ec1905af20..925ad10c6e0 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -33,8 +33,8 @@
 struct vl_vertex_stream
 {
    struct vertex2s pos;
-   uint8_t dct_type_field;
    uint8_t mb_type_intra;
+   uint8_t dct_type_field;
    uint8_t dummy[2];
    uint8_t eb[3][2][2];
    struct vertex4s mv[4];
@@ -141,7 +141,7 @@ vl_vb_get_elems_state(struct pipe_context *pipe, int component, int motionvector
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
 
    /* flags */
-   vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_UNORM;
+   vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
 
    /* empty block element of selected component */
    vertex_elems[VS_I_EB].src_offset = 8 + component * 4;
@@ -267,7 +267,7 @@ vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *
             stream->eb[i][j][k] = !(mb->cbp & (*empty_block_mask)[i][j][k]);
 
    stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
-   stream->mb_type_intra = !mb->dct_intra;
+   stream->mb_type_intra = mb->dct_intra;
 
    get_motion_vectors(mb, stream->mv);
 }
-- 
cgit v1.2.3


From c87b83d4b2b860bb4c9f90897232e661a2b94c07 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 15 Apr 2011 22:15:17 +0200
Subject: [g3dvl] give mc and ycbcr stage its own vertex element state

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c |  4 ++--
 src/gallium/auxiliary/vl/vl_vertex_buffers.c | 26 +++++++++++++++++++++++---
 src/gallium/auxiliary/vl/vl_vertex_buffers.h | 10 +++++++---
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 906be3775c4..9e0d40e7f5b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -715,10 +715,10 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
 
    dec->quads = vl_vb_upload_quads(dec->pipe, 2, 2);
    for (i = 0; i < VL_MAX_PLANES; ++i)
-      dec->ves_eb[i] = vl_vb_get_elems_state(dec->pipe, i, 0);
+      dec->ves_eb[i] = vl_vb_get_ves_eb(dec->pipe, i);
 
    for (i = 0; i < 2; ++i)
-      dec->ves_mv[i] = vl_vb_get_elems_state(dec->pipe, 0, i);
+      dec->ves_mv[i] = vl_vb_get_ves_mv(dec->pipe, i);
 
    dec->base.width = align(width, MACROBLOCK_WIDTH);
    dec->base.height = align(height, MACROBLOCK_HEIGHT);
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 925ad10c6e0..fc30e85a4d7 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -130,10 +130,12 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements
 }
 
 void *
-vl_vb_get_elems_state(struct pipe_context *pipe, int component, int motionvector)
+vl_vb_get_ves_eb(struct pipe_context *pipe, int component)
 {
    struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
 
+   assert(pipe);
+
    memset(&vertex_elems, 0, sizeof(vertex_elems));
    vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
 
@@ -144,11 +146,29 @@ vl_vb_get_elems_state(struct pipe_context *pipe, int component, int motionvector
    vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
 
    /* empty block element of selected component */
-   vertex_elems[VS_I_EB].src_offset = 8 + component * 4;
+   vertex_elems[VS_I_EB].src_offset = offsetof(struct vl_vertex_stream, eb[component]);
    vertex_elems[VS_I_EB].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
 
+   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - 1, 1);
+
+   return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems);
+}
+
+void *
+vl_vb_get_ves_mv(struct pipe_context *pipe, int motionvector)
+{
+   struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
+
+   memset(&vertex_elems, 0, sizeof(vertex_elems));
+   vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
+
+   assert(pipe);
+
+   /* Position element */
+   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
+
    /* motion vector TOP element */
-   vertex_elems[VS_I_MV_TOP].src_offset = 20 + motionvector * 16;
+   vertex_elems[VS_I_MV_TOP].src_offset = offsetof(struct vl_vertex_stream, mv[motionvector * 2]);
    vertex_elems[VS_I_MV_TOP].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED;
 
    /* motion vector BOTTOM element */
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 6cbda7cc9b1..62f7bf00508 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -42,10 +42,12 @@ enum VS_INPUT
 {
    VS_I_RECT,
    VS_I_VPOS,
+
    VS_I_FLAGS,
    VS_I_EB,
-   VS_I_MV_TOP,
-   VS_I_MV_BOTTOM,
+
+   VS_I_MV_TOP = VS_I_FLAGS,
+   VS_I_MV_BOTTOM = VS_I_EB,
 
    NUM_VS_INPUTS
 };
@@ -64,7 +66,9 @@ struct vl_vertex_buffer
 struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe,
                                              unsigned blocks_x, unsigned blocks_y);
 
-void *vl_vb_get_elems_state(struct pipe_context *pipe, int component, int motionvector);
+void *vl_vb_get_ves_eb(struct pipe_context *pipe, int component);
+
+void *vl_vb_get_ves_mv(struct pipe_context *pipe, int motionvector);
 
 struct pipe_vertex_buffer vl_vb_init(struct vl_vertex_buffer *buffer,
                                      struct pipe_context *pipe,
-- 
cgit v1.2.3


From ffcf287aa227cfd1c0f928a3310d98554caf16cd Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 16 Apr 2011 12:57:45 +0200
Subject: vdpau: implement VDPAU_DUMP option

---
 src/gallium/state_trackers/vdpau/presentation.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index fdd0144fe22..063c63fb4ee 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -25,6 +25,8 @@
  *
  **************************************************************************/
 
+#include <stdio.h>
+
 #include "vdpau_private.h"
 #include <vdpau/vdpau.h>
 #include <util/u_debug.h>
@@ -131,6 +133,8 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
                               uint32_t clip_height,
                               VdpTime  earliest_presentation_time)
 {
+   static int dump_window = -1;
+
    vlVdpPresentationQueue *pq;
    vlVdpOutputSurface *surf;
    struct pipe_surface *drawable_surface;
@@ -160,6 +164,19 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
       vl_contextprivate_get(pq->device->context, drawable_surface)
    );
 
+   if(dump_window == -1) {
+      dump_window = debug_get_num_option("VDPAU_DUMP", 0);
+   }
+
+   if(dump_window) {
+      static unsigned int framenum = 0;
+      char cmd[256];
+
+      sprintf(cmd, "xwd -id %d -out vdpau_frame_%08d.xwd", (int)pq->drawable, ++framenum);
+      if (system(cmd) != 0)
+         _debug_printf("[XvMC] Dumping surface %d failed.\n", surface);
+   }
+
    return VDP_STATUS_OK;
 }
 
-- 
cgit v1.2.3


From ff210aea7c080600bd45eb18b29a6109468ed4df Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 16 Apr 2011 13:04:04 +0200
Subject: [g3dvl] back to seperate mc for y and c planes

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 27 ++++++++++++++++++---------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h |  2 +-
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 9e0d40e7f5b..9e89730a23d 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -279,11 +279,14 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    dec->pipe->delete_blend_state(dec->pipe, dec->blend);
    dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa);
 
-   vl_mc_cleanup(&dec->mc);
+   vl_mc_cleanup(&dec->mc_y);
+   vl_mc_cleanup(&dec->mc_c);
+
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
       vl_idct_cleanup(&dec->idct_y);
       vl_idct_cleanup(&dec->idct_c);
    }
+
    for (i = 0; i < VL_MAX_PLANES; ++i)
       dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_eb[i]);
 
@@ -414,13 +417,13 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    if (!mc_source_sv)
       goto error_mc_source_sv;
 
-   if(!vl_mc_init_buffer(&dec->mc, &buffer->mc[0], mc_source_sv[0]))
+   if(!vl_mc_init_buffer(&dec->mc_y, &buffer->mc[0], mc_source_sv[0]))
       goto error_mc_y;
 
-   if(!vl_mc_init_buffer(&dec->mc, &buffer->mc[1], mc_source_sv[1]))
+   if(!vl_mc_init_buffer(&dec->mc_c, &buffer->mc[1], mc_source_sv[1]))
       goto error_mc_cb;
 
-   if(!vl_mc_init_buffer(&dec->mc, &buffer->mc[2], mc_source_sv[2]))
+   if(!vl_mc_init_buffer(&dec->mc_c, &buffer->mc[2], mc_source_sv[2]))
       goto error_mc_cr;
 
    return &buffer->base;
@@ -479,7 +482,7 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
    for (i = 0; i < VL_MAX_PLANES; ++i) {
       bool first = true;
 
-      vl_mc_set_surface(&dec->mc, surfaces[i]);
+      vl_mc_set_surface(i == 0 ? &dec->mc_y : &dec->mc_c, surfaces[i]);
 
       for (j = 0; j < 2; ++j) {
          if (sv[j] == NULL) continue;
@@ -756,8 +759,11 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
       }
    }
 
-   if (!vl_mc_init(&dec->mc, dec->pipe, dec->base.width, dec->base.height, mc_scale))
-      goto error_mc;
+   if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, mc_scale))
+      goto error_mc_y;
+
+   if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, mc_scale))
+      goto error_mc_c;
 
    if (!init_pipe_state(dec))
       goto error_pipe_state;
@@ -765,9 +771,12 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    return &dec->base;
 
 error_pipe_state:
-   vl_mc_cleanup(&dec->mc);
+   vl_mc_cleanup(&dec->mc_c);
+
+error_mc_c:
+   vl_mc_cleanup(&dec->mc_y);
 
-error_mc:
+error_mc_y:
    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
       vl_idct_cleanup(&dec->idct_y);
       vl_idct_cleanup(&dec->idct_c);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index c27197f6664..6f93c5a01ab 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -56,7 +56,7 @@ struct vl_mpeg12_decoder
    void *ves_mv[2];
 
    struct vl_idct idct_y, idct_c;
-   struct vl_mpeg12_mc_renderer mc;
+   struct vl_mpeg12_mc_renderer mc_y, mc_c;
 
    void *dsa;
    void *blend;
-- 
cgit v1.2.3


From 5294ac62236bf05e1eaaca3399e539c28c0ccc4c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 16 Apr 2011 13:40:19 +0200
Subject: [g3dvl] move mc fb and viewport handling into buffer object

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c     |  9 +--
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 74 +++++++++++++++---------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h | 16 ++---
 3 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 9e89730a23d..64127cf2d69 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -480,16 +480,13 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
    dec->pipe->set_vertex_buffers(dec->pipe, 2, buf->vertex_bufs.all);
 
    for (i = 0; i < VL_MAX_PLANES; ++i) {
-      bool first = true;
-
-      vl_mc_set_surface(i == 0 ? &dec->mc_y : &dec->mc_c, surfaces[i]);
+      vl_mc_set_surface(&buf->mc[i], surfaces[i]);
 
       for (j = 0; j < 2; ++j) {
          if (sv[j] == NULL) continue;
 
          dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_mv[j]);
-         vl_mc_render_ref(&buf->mc[i], sv[j][i], first, ne_start, ne_num, e_start, e_num);
-         first = false;
+         vl_mc_render_ref(&buf->mc[i], sv[j][i], ne_start, ne_num, e_start, e_num);
       }
 
       dec->pipe->bind_blend_state(dec->pipe, dec->blend);
@@ -498,7 +495,7 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
          vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], ne_num);
 
-      vl_mc_render_ycbcr(&buf->mc[i], first, ne_start, ne_num);
+      vl_mc_render_ycbcr(&buf->mc[i], ne_start, ne_num);
 
    }
    dec->pipe->flush(dec->pipe, fence);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 65909a57b26..8102aed838e 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -366,16 +366,6 @@ init_pipe_state(struct vl_mpeg12_mc_renderer *r)
 
    assert(r);
 
-   r->viewport.scale[2] = 1;
-   r->viewport.scale[3] = 1;
-   r->viewport.translate[0] = 0;
-   r->viewport.translate[1] = 0;
-   r->viewport.translate[2] = 0;
-   r->viewport.translate[3] = 0;
-
-   r->fb_state.nr_cbufs = 1;
-   r->fb_state.zsbuf = NULL;
-
    memset(&sampler, 0, sizeof(sampler));
    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
@@ -563,6 +553,16 @@ vl_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_bu
 
    buffer->renderer = renderer;
 
+   buffer->viewport.scale[2] = 1;
+   buffer->viewport.scale[3] = 1;
+   buffer->viewport.translate[0] = 0;
+   buffer->viewport.translate[1] = 0;
+   buffer->viewport.translate[2] = 0;
+   buffer->viewport.translate[3] = 0;
+
+   buffer->fb_state.nr_cbufs = 1;
+   buffer->fb_state.zsbuf = NULL;
+
    pipe_sampler_view_reference(&buffer->source, source);
 
    return true;
@@ -577,21 +577,43 @@ vl_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer)
 }
 
 void
-vl_mc_set_surface(struct vl_mpeg12_mc_renderer *renderer, struct pipe_surface *surface)
+vl_mc_set_surface(struct vl_mpeg12_mc_buffer *buffer, struct pipe_surface *surface)
+{
+   assert(buffer && surface);
+
+   buffer->surface_cleared = false;
+
+   buffer->viewport.scale[0] = surface->width;
+   buffer->viewport.scale[1] = surface->height;
+
+   buffer->fb_state.width = surface->width;
+   buffer->fb_state.height = surface->height;
+   buffer->fb_state.cbufs[0] = surface;
+}
+
+static void
+prepare_pipe_4_rendering(struct vl_mpeg12_mc_buffer *buffer)
 {
-   assert(renderer && surface);
+   struct vl_mpeg12_mc_renderer *renderer;
+
+   assert(buffer);
 
-   renderer->viewport.scale[0] = surface->width;
-   renderer->viewport.scale[1] = surface->height;
+   renderer = buffer->renderer;
+   renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
 
-   renderer->fb_state.width = surface->width;
-   renderer->fb_state.height = surface->height;
-   renderer->fb_state.cbufs[0] = surface;
+   if (buffer->surface_cleared)
+      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add);
+   else {
+      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear);
+      buffer->surface_cleared = true;
+   }
+
+   renderer->pipe->set_framebuffer_state(renderer->pipe, &buffer->fb_state);
+   renderer->pipe->set_viewport_state(renderer->pipe, &buffer->viewport);
 }
 
 void
-vl_mc_render_ref(struct vl_mpeg12_mc_buffer *buffer,
-                 struct pipe_sampler_view *ref, bool first,
+vl_mc_render_ref(struct vl_mpeg12_mc_buffer *buffer, struct pipe_sampler_view *ref,
                  unsigned not_empty_start_instance, unsigned not_empty_num_instances,
                  unsigned empty_start_instance, unsigned empty_num_instances)
 {
@@ -602,11 +624,9 @@ vl_mc_render_ref(struct vl_mpeg12_mc_buffer *buffer,
    if (not_empty_num_instances == 0 && empty_num_instances == 0)
       return;
 
+   prepare_pipe_4_rendering(buffer);
+
    renderer = buffer->renderer;
-   renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
-   renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
-   renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
-   renderer->pipe->bind_blend_state(renderer->pipe, first ? renderer->blend_clear : renderer->blend_add);
 
    renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ref);
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ref);
@@ -624,7 +644,7 @@ vl_mc_render_ref(struct vl_mpeg12_mc_buffer *buffer,
 }
 
 void
-vl_mc_render_ycbcr(struct vl_mpeg12_mc_buffer *buffer, bool first,
+vl_mc_render_ycbcr(struct vl_mpeg12_mc_buffer *buffer,
                    unsigned not_empty_start_instance, unsigned not_empty_num_instances)
 {
    struct vl_mpeg12_mc_renderer *renderer;
@@ -634,11 +654,9 @@ vl_mc_render_ycbcr(struct vl_mpeg12_mc_buffer *buffer, bool first,
    if (not_empty_num_instances == 0)
       return;
 
+   prepare_pipe_4_rendering(buffer);
+
    renderer = buffer->renderer;
-   renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
-   renderer->pipe->set_framebuffer_state(renderer->pipe, &renderer->fb_state);
-   renderer->pipe->set_viewport_state(renderer->pipe, &renderer->viewport);
-   renderer->pipe->bind_blend_state(renderer->pipe, first ? renderer->blend_clear : renderer->blend_add);
 
    renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ycbcr);
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index d6561369e28..f67f97a71fc 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -41,9 +41,6 @@ struct vl_mpeg12_mc_renderer
    unsigned buffer_width;
    unsigned buffer_height;
 
-   struct pipe_viewport_state viewport;
-   struct pipe_framebuffer_state fb_state;
-
    void *rs_state;
 
    void *blend_clear, *blend_add;
@@ -57,6 +54,12 @@ struct vl_mpeg12_mc_renderer
 struct vl_mpeg12_mc_buffer
 {
    struct vl_mpeg12_mc_renderer *renderer;
+
+   bool surface_cleared;
+
+   struct pipe_viewport_state viewport;
+   struct pipe_framebuffer_state fb_state;
+
    struct pipe_sampler_view *source;
 };
 
@@ -70,14 +73,13 @@ bool vl_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_
 
 void vl_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer);
 
-void vl_mc_set_surface(struct vl_mpeg12_mc_renderer *renderer, struct pipe_surface *surface);
+void vl_mc_set_surface(struct vl_mpeg12_mc_buffer *buffer, struct pipe_surface *surface);
 
-void vl_mc_render_ref(struct vl_mpeg12_mc_buffer *buffer,
-                      struct pipe_sampler_view *ref, bool first,
+void vl_mc_render_ref(struct vl_mpeg12_mc_buffer *buffer, struct pipe_sampler_view *ref,
                       unsigned not_empty_start_instance, unsigned not_empty_num_instances,
                       unsigned empty_start_instance, unsigned empty_num_instances);
 
-void vl_mc_render_ycbcr(struct vl_mpeg12_mc_buffer *buffer, bool first,
+void vl_mc_render_ycbcr(struct vl_mpeg12_mc_buffer *buffer,
                         unsigned not_empty_start_instance, unsigned not_empty_num_instances);
 
 #endif /* vl_mpeg12_mc_renderer_h */
-- 
cgit v1.2.3


From cfe921a9b659ae94248054c4f1330f863d214d5b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 16 Apr 2011 16:02:40 +0200
Subject: [g3dvl] remove dummy sampler from mc

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 32 ------------------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  2 --
 2 files changed, 34 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 8102aed838e..afec37e28d9 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -453,9 +453,6 @@ bool
 vl_mc_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe,
            unsigned buffer_width, unsigned buffer_height, float scale)
 {
-   struct pipe_resource tex_templ, *tex_dummy;
-   struct pipe_sampler_view sampler_view;
-
    assert(renderer);
    assert(pipe);
 
@@ -484,35 +481,8 @@ vl_mc_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe,
    if (!renderer->fs_ycbcr)
       goto error_fs_ycbcr;
 
-   /* create a dummy sampler */
-   memset(&tex_templ, 0, sizeof(tex_templ));
-   tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
-   tex_templ.flags = 0;
-
-   tex_templ.target = PIPE_TEXTURE_2D;
-   tex_templ.format = PIPE_FORMAT_R8_SNORM;
-   tex_templ.width0 = 1;
-   tex_templ.height0 = 1;
-   tex_templ.depth0 = 1;
-   tex_templ.array_size = 1;
-   tex_templ.last_level = 0;
-   tex_templ.usage = PIPE_USAGE_STATIC;
-   tex_dummy = pipe->screen->resource_create(pipe->screen, &tex_templ);
-   if (!tex_dummy)
-      goto error_dummy;
-
-   memset(&sampler_view, 0, sizeof(sampler_view));
-   u_sampler_view_default_template(&sampler_view, tex_dummy, tex_dummy->format);
-   renderer->dummy = pipe->create_sampler_view(pipe, tex_dummy, &sampler_view);
-   pipe_resource_reference(&tex_dummy, NULL);
-   if (!renderer->dummy)
-      goto error_dummy;
-
    return true;
 
-error_dummy:
-   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
-
 error_fs_ycbcr:
    renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
 
@@ -534,8 +504,6 @@ vl_mc_cleanup(struct vl_mpeg12_mc_renderer *renderer)
 {
    assert(renderer);
 
-   pipe_sampler_view_reference(&renderer->dummy, NULL);
-
    cleanup_pipe_state(renderer);
 
    renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index f67f97a71fc..b5dd0139b02 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -47,8 +47,6 @@ struct vl_mpeg12_mc_renderer
    void *vs_ref, *vs_ycbcr;
    void *fs_ref, *fs_ycbcr;
    void *sampler_ref, *sampler_ycbcr;
-
-   struct pipe_sampler_view *dummy;
 };
 
 struct vl_mpeg12_mc_buffer
-- 
cgit v1.2.3


From f1485e155a985df3100708f4bfb1a9d7c72217f4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 16 Apr 2011 16:22:53 +0200
Subject: [g3dvl] make macroblock_size configurable in mc

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c     |  5 ++--
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 38 +++++++++++++-----------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  4 ++-
 3 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 64127cf2d69..08b740c6199 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -756,10 +756,11 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
       }
    }
 
-   if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, mc_scale))
+   if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, mc_scale))
       goto error_mc_y;
 
-   if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, mc_scale))
+   // TODO
+   if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, mc_scale))
       goto error_mc_c;
 
    if (!init_pipe_state(dec))
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index afec37e28d9..b87bd6b14c9 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -149,21 +149,23 @@ create_ycbcr_vert_shader(struct vl_mpeg12_mc_renderer *r)
             ureg_scalar(eb, TGSI_SWIZZLE_W),
             ureg_scalar(eb, TGSI_SWIZZLE_Z));
 
-   ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_Y), &label);
-
-      ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
-      ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
-      ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex));
-      ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
-      ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
-      ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
-
-      ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
-         ureg_scalar(vrect, TGSI_SWIZZLE_Y),
-         ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
-
-   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
-   ureg_ENDIF(shader);
+   if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO
+      ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_Y), &label);
+
+         ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
+         ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
+         ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex));
+         ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
+         ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
+         ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
+
+         ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
+            ureg_scalar(vrect, TGSI_SWIZZLE_Y),
+            ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
+
+      ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+      ureg_ENDIF(shader);
+   }
 
    ureg_release_temporary(shader, t_vtex);
    ureg_release_temporary(shader, t_vpos);
@@ -211,7 +213,7 @@ create_ref_vert_shader(struct vl_mpeg12_mc_renderer *r)
     */
 
    ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
-      vrect, ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
+      vrect, ureg_imm1f(shader, r->macroblock_size / 2));
 
    mv_scale = ureg_imm4f(shader,
       0.5f / r->buffer_width,
@@ -451,7 +453,8 @@ cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
 
 bool
 vl_mc_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe,
-           unsigned buffer_width, unsigned buffer_height, float scale)
+           unsigned buffer_width, unsigned buffer_height,
+           unsigned macroblock_size, float scale)
 {
    assert(renderer);
    assert(pipe);
@@ -461,6 +464,7 @@ vl_mc_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe,
    renderer->pipe = pipe;
    renderer->buffer_width = buffer_width;
    renderer->buffer_height = buffer_height;
+   renderer->macroblock_size = macroblock_size;
 
    if (!init_pipe_state(renderer))
       goto error_pipe_state;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
index b5dd0139b02..4137ac407d2 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
@@ -40,6 +40,7 @@ struct vl_mpeg12_mc_renderer
    struct pipe_context *pipe;
    unsigned buffer_width;
    unsigned buffer_height;
+   unsigned macroblock_size;
 
    void *rs_state;
 
@@ -62,7 +63,8 @@ struct vl_mpeg12_mc_buffer
 };
 
 bool vl_mc_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe,
-                unsigned picture_width, unsigned picture_height, float scale);
+                unsigned picture_width, unsigned picture_height,
+                unsigned macroblock_size, float scale);
 
 void vl_mc_cleanup(struct vl_mpeg12_mc_renderer *renderer);
 
-- 
cgit v1.2.3


From 9c8bb28ca128dca9f279c78857da1b39223e30f7 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 17 Apr 2011 12:04:18 +0200
Subject: [g3dvl] move top/bottom field selection into mc code

Removes the workaround and get interlaced videos to work 100% correctly.
---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 45 +++++++++++++++++++-----
 src/gallium/auxiliary/vl/vl_vertex_buffers.c     | 26 ++++++--------
 2 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index b87bd6b14c9..eae546aefcc 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -218,12 +218,12 @@ create_ref_vert_shader(struct vl_mpeg12_mc_renderer *r)
    mv_scale = ureg_imm4f(shader,
       0.5f / r->buffer_width,
       0.5f / r->buffer_height,
-      1.0f,
+      1.0f / 4.0f,
       1.0f / 255.0f);
 
    for (i = 0; i < 2; ++i) {
       ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
-      ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_W), mv_scale, vmv[i]);
+      ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), mv_scale, vmv[i]);
    }
 
    ureg_release_temporary(shader, t_vpos);
@@ -320,10 +320,15 @@ create_ycbcr_frag_shader(struct vl_mpeg12_mc_renderer *r, float scale)
 static void *
 create_ref_frag_shader(struct vl_mpeg12_mc_renderer *r)
 {
+   const float y_scale =
+      r->buffer_height / 2 *
+      r->macroblock_size / MACROBLOCK_HEIGHT;
+
    struct ureg_program *shader;
    struct ureg_src tc[2], sampler;
    struct ureg_dst ref, field;
    struct ureg_dst fragment;
+   unsigned label;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -340,15 +345,37 @@ create_ref_frag_shader(struct vl_mpeg12_mc_renderer *r)
    field = calc_field(shader);
 
    /*
-    * if (field.z)
-    *    ref[0..1] = tex(tc[0..1], sampler[0..1])
-    * else
-    *    ref[0..1] = tex(tc[2..3], sampler[0..1])
-    * result = LRP(info.y, ref[0..1])
+    * ref = field.z ? tc[1] : tc[0]
+    *
+    * // Adjust tc acording to top/bottom field selection
+    * if (|ref.z|) {
+    *    ref.y *= y_scale
+    *    ref.y = floor(ref.y)
+    *    ref.y += ref.z
+    *    ref.y /= y_scale
+    * }
+    * fragment.xyz = tex(ref, sampler[0])
     */
-   ureg_CMP(shader, ref, ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)), tc[1], tc[0]);
+   ureg_CMP(shader, ureg_writemask(ref, TGSI_WRITEMASK_XYZ),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[1], tc[0]);
+   ureg_CMP(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[1], tc[0]);
+
+   ureg_IF(shader, ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z), &label);
+
+      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
+               ureg_src(ref), ureg_imm1f(shader, y_scale));
+      ureg_FLR(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref));
+      ureg_ADD(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
+               ureg_src(ref), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z));
+      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
+               ureg_src(ref), ureg_imm1f(shader, 1.0f / y_scale));
+
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+   ureg_ENDIF(shader);
 
-   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(ref));
    ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), TGSI_TEXTURE_2D, ureg_src(ref), sampler);
 
    ureg_release_temporary(shader, ref);
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index fc30e85a4d7..84dfc9eccf6 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -228,34 +228,28 @@ get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex4s mv[4])
    if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
       mv[0].x = mv[1].x = mb->mv[0].top.x;
       mv[0].y = mv[1].y = mb->mv[0].top.y;
-      mv[0].z = 0; mv[1].z = 1;
+      mv[0].z = mv[1].z = 0;
 
       mv[2].x = mv[3].x = mb->mv[1].top.x;
       mv[2].y = mv[3].y = mb->mv[1].top.y;
-      mv[2].z = 0; mv[3].z = 1;
+      mv[2].z = mv[3].z = 0;
 
    } else {
       mv[0].x = mb->mv[0].top.x;
-      mv[0].y = mb->mv[0].top.y - (mb->mv[0].top.y % 4);
-      mv[0].z = mb->mv[0].top.field_select;
+      mv[0].y = mb->mv[0].top.y;
+      mv[0].z = mb->mv[0].top.field_select ? 3 : 1;
 
       mv[1].x = mb->mv[0].bottom.x;
-      mv[1].y = mb->mv[0].bottom.y - (mb->mv[0].bottom.y % 4);
-      mv[1].z = mb->mv[0].bottom.field_select;
-
-      if (mb->mv[0].top.field_select) mv[0].y += 2;
-      if (!mb->mv[0].bottom.field_select) mv[1].y -= 2;
+      mv[1].y = mb->mv[0].bottom.y;
+      mv[1].z = mb->mv[0].bottom.field_select ? 3 : 1;
 
       mv[2].x = mb->mv[1].top.x;
-      mv[2].y = mb->mv[1].top.y - (mb->mv[1].top.y % 4);
-      mv[2].z = mb->mv[1].top.field_select;
+      mv[2].y = mb->mv[1].top.y;
+      mv[2].z = mb->mv[1].top.field_select ? 3 : 1;
 
       mv[3].x = mb->mv[1].bottom.x;
-      mv[3].y = mb->mv[1].bottom.y - (mb->mv[1].bottom.y % 4);
-      mv[3].z = mb->mv[1].bottom.field_select;
-
-      if (mb->mv[1].top.field_select) mv[2].y += 2;
-      if (!mb->mv[1].bottom.field_select) mv[3].y -= 2;
+      mv[3].y = mb->mv[1].bottom.y;
+      mv[3].z = mb->mv[1].bottom.field_select ? 3 : 1;
    }
 
    mv[0].w = mv[1].w = mb->mv[0].wheight;
-- 
cgit v1.2.3


From ca79aeb91e914ac1a4774d51ca49911406377407 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 17 Apr 2011 12:15:14 +0200
Subject: [g3dvl] rename vl_mpeg12_mc_renderer into vl_mc

It's still not 100% free from mpeg12 specific stuff,
but should now be a good start for other codecs.
---
 src/gallium/auxiliary/Makefile                   |   2 +-
 src/gallium/auxiliary/vl/vl_mc.c                 | 668 +++++++++++++++++++++++
 src/gallium/auxiliary/vl/vl_mc.h                 |  85 +++
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h     |   6 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 668 -----------------------
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h |  85 ---
 6 files changed, 757 insertions(+), 757 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_mc.c
 create mode 100644 src/gallium/auxiliary/vl/vl_mc.h
 delete mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
 delete mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 425ae78138b..d210a25510b 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -149,11 +149,11 @@ C_SOURCES = \
 	util/u_vbuf_mgr.c \
 	vl/vl_context.c \
 	vl/vl_bitstream_parser.c \
-	vl/vl_mpeg12_mc_renderer.c \
 	vl/vl_mpeg12_decoder.c \
 	vl/vl_compositor.c \
 	vl/vl_csc.c \
         vl/vl_idct.c \
+	vl/vl_mc.c \
         vl/vl_vertex_buffers.c \
         vl/vl_video_buffer.c
 
diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
new file mode 100644
index 00000000000..707a4a27077
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -0,0 +1,668 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+
+#include <pipe/p_context.h>
+
+#include <util/u_sampler.h>
+#include <util/u_draw.h>
+
+#include <tgsi/tgsi_ureg.h>
+
+#include "vl_defines.h"
+#include "vl_vertex_buffers.h"
+#include "vl_mc.h"
+
+enum VS_OUTPUT
+{
+   VS_O_VPOS,
+   VS_O_LINE,
+   VS_O_VTOP,
+   VS_O_VBOTTOM
+};
+
+static struct ureg_dst
+calc_position(struct vl_mc *r, struct ureg_program *shader)
+{
+   struct ureg_src block_scale;
+   struct ureg_src vrect, vpos;
+   struct ureg_dst t_vpos;
+   struct ureg_dst o_vpos;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   t_vpos = ureg_DECL_temporary(shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+
+   /*
+    * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
+    *
+    * t_vpos = (vpos + vrect) * block_scale
+    * o_vpos.xy = t_vpos
+    * o_vpos.zw = vpos
+    */
+   block_scale = ureg_imm2f(shader,
+      (float)MACROBLOCK_WIDTH / r->buffer_width,
+      (float)MACROBLOCK_HEIGHT / r->buffer_height);
+
+   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
+   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
+
+   return t_vpos;
+}
+
+static void *
+create_ycbcr_vert_shader(struct vl_mc *r)
+{
+   struct ureg_program *shader;
+   struct ureg_src block_scale;
+   struct ureg_src vrect, vpos, eb, flags;
+   struct ureg_dst t_vpos, t_vtex;
+   struct ureg_dst o_line, o_vtex[2];
+   unsigned label;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+   eb = ureg_DECL_vs_input(shader, VS_I_EB);
+   flags = ureg_DECL_vs_input(shader, VS_I_FLAGS);
+
+   t_vpos = calc_position(r, shader);
+   t_vtex = ureg_DECL_temporary(shader);
+
+   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
+   o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
+   o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
+
+   /*
+    * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
+    *
+    * o_line.x = interlaced
+    * o_line.y = vrect
+    *
+    * o_vtex[0].z = vrect.x ? eb.y : eb.x
+    * o_vtex[1].z = vrect.x ? eb.w : eb.z
+    *
+    * if(interlaced) {
+    *    t_vtex.x = vrect.x
+    *    t_vtex.y = vrect.y * 0.5
+    *    t_vtex += vpos
+    *
+    *    o_vtex[0].xy = t_vtex * block_scale
+    *
+    *    t_vtex.y += 0.5
+    *    o_vtex[1].xy = t_vtex * block_scale
+    * } else {
+    *    o_vtex[0..1].xy = t_vpos
+    * }
+    * o_vtex[2].xy = t_vpos
+    *
+    */
+   block_scale = ureg_imm2f(shader,
+      (float)MACROBLOCK_WIDTH / r->buffer_width,
+      (float)MACROBLOCK_HEIGHT / r->buffer_height);
+
+   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), flags, ureg_imm1f(shader, 0.5f));
+   ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect);
+
+   ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_CMP(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_Z),
+            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
+            ureg_scalar(eb, TGSI_SWIZZLE_Y),
+            ureg_scalar(eb, TGSI_SWIZZLE_X));
+
+   ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_CMP(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_Z),
+            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
+            ureg_scalar(eb, TGSI_SWIZZLE_W),
+            ureg_scalar(eb, TGSI_SWIZZLE_Z));
+
+   if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO
+      ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_Y), &label);
+
+         ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
+         ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
+         ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex));
+         ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
+         ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
+         ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
+
+         ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
+            ureg_scalar(vrect, TGSI_SWIZZLE_Y),
+            ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
+
+      ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+      ureg_ENDIF(shader);
+   }
+
+   ureg_release_temporary(shader, t_vtex);
+   ureg_release_temporary(shader, t_vpos);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
+static void *
+create_ref_vert_shader(struct vl_mc *r)
+{
+   struct ureg_program *shader;
+   struct ureg_src mv_scale;
+   struct ureg_src vrect, vmv[2];
+   struct ureg_dst t_vpos;
+   struct ureg_dst o_vpos, o_line, o_vmv[2];
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   ureg_DECL_vs_input(shader, VS_I_EB);
+   ureg_DECL_vs_input(shader, VS_I_FLAGS);
+   vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP);
+   vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM);
+
+   t_vpos = calc_position(r, shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
+   o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
+   o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
+
+   /*
+    * mv_scale = 0.5 / (dst.width, dst.height);
+    *
+    * // Apply motion vectors
+    * o_vmv[0..3] = t_vpos + vmv[0..3] * mv_scale
+    *
+    * o_line.y = vrect
+    *
+    */
+
+   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
+      vrect, ureg_imm1f(shader, r->macroblock_size / 2));
+
+   mv_scale = ureg_imm4f(shader,
+      0.5f / r->buffer_width,
+      0.5f / r->buffer_height,
+      1.0f / 4.0f,
+      1.0f / 255.0f);
+
+   for (i = 0; i < 2; ++i) {
+      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
+      ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), mv_scale, vmv[i]);
+   }
+
+   ureg_release_temporary(shader, t_vpos);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
+static struct ureg_dst
+calc_field(struct ureg_program *shader)
+{
+   struct ureg_dst tmp;
+   struct ureg_src line;
+
+   tmp = ureg_DECL_temporary(shader);
+
+   line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
+
+   /*
+    * line.x is flag for intra frames
+    * line.y going from 0 to 1 if not interlaced
+    * line.y going from 0 to 8 in steps of 0.5 if interlaced
+    *
+    * tmp.xy = fraction(line)
+    * tmp.xy = tmp.xy >= 0.5 ? 1 : 0
+    */
+   ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), line);
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
+
+   return tmp;
+}
+
+static void *
+create_ycbcr_frag_shader(struct vl_mc *r, float scale)
+{
+   struct ureg_program *shader;
+   struct ureg_src tc[2], sampler;
+   struct ureg_dst texel, t_tc, field;
+   struct ureg_dst fragment;
+   unsigned label;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
+   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);
+
+   sampler = ureg_DECL_sampler(shader, 0);
+
+   t_tc = ureg_DECL_temporary(shader);
+   texel = ureg_DECL_temporary(shader);
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   field = calc_field(shader);
+
+   /*
+    * texel.y  = tex(field.y ? tc[1] : tc[0], sampler[0])
+    * texel.cb = tex(tc[2], sampler[1])
+    * texel.cr = tex(tc[2], sampler[2])
+    */
+
+   ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XYZ),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[1], tc[0]);
+
+   ureg_SLT(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc), ureg_imm1f(shader, 0.5f));
+
+   ureg_MOV(shader, fragment, ureg_imm4f(shader, 0.0f, 0.0f, 0.0f, 1.0f));
+   ureg_IF(shader, ureg_scalar(ureg_src(t_tc), TGSI_SWIZZLE_Z), &label);
+
+      ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
+
+      if (scale != 1.0f)
+         ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
+                  ureg_src(texel), ureg_imm1f(shader, scale),
+                  ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X));
+      else
+         ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
+                  ureg_src(texel), ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X));
+
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+   ureg_ENDIF(shader);
+
+   ureg_release_temporary(shader, t_tc);
+   ureg_release_temporary(shader, texel);
+
+   return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
+static void *
+create_ref_frag_shader(struct vl_mc *r)
+{
+   const float y_scale =
+      r->buffer_height / 2 *
+      r->macroblock_size / MACROBLOCK_HEIGHT;
+
+   struct ureg_program *shader;
+   struct ureg_src tc[2], sampler;
+   struct ureg_dst ref, field;
+   struct ureg_dst fragment;
+   unsigned label;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
+   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);
+
+   sampler = ureg_DECL_sampler(shader, 0);
+   ref = ureg_DECL_temporary(shader);
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   field = calc_field(shader);
+
+   /*
+    * ref = field.z ? tc[1] : tc[0]
+    *
+    * // Adjust tc acording to top/bottom field selection
+    * if (|ref.z|) {
+    *    ref.y *= y_scale
+    *    ref.y = floor(ref.y)
+    *    ref.y += ref.z
+    *    ref.y /= y_scale
+    * }
+    * fragment.xyz = tex(ref, sampler[0])
+    */
+   ureg_CMP(shader, ureg_writemask(ref, TGSI_WRITEMASK_XYZ),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[1], tc[0]);
+   ureg_CMP(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W),
+            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
+            tc[1], tc[0]);
+
+   ureg_IF(shader, ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z), &label);
+
+      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
+               ureg_src(ref), ureg_imm1f(shader, y_scale));
+      ureg_FLR(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref));
+      ureg_ADD(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
+               ureg_src(ref), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z));
+      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
+               ureg_src(ref), ureg_imm1f(shader, 1.0f / y_scale));
+
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+   ureg_ENDIF(shader);
+
+   ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), TGSI_TEXTURE_2D, ureg_src(ref), sampler);
+
+   ureg_release_temporary(shader, ref);
+
+   ureg_release_temporary(shader, field);
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
+static bool
+init_pipe_state(struct vl_mc *r)
+{
+   struct pipe_sampler_state sampler;
+   struct pipe_blend_state blend;
+   struct pipe_rasterizer_state rs_state;
+
+   assert(r);
+
+   memset(&sampler, 0, sizeof(sampler));
+   sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+   sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
+   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
+   sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+   sampler.compare_func = PIPE_FUNC_ALWAYS;
+   sampler.normalized_coords = 1;
+   r->sampler_ref = r->pipe->create_sampler_state(r->pipe, &sampler);
+   if (!r->sampler_ref)
+      goto error_sampler_ref;
+
+   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+   r->sampler_ycbcr = r->pipe->create_sampler_state(r->pipe, &sampler);
+   if (!r->sampler_ycbcr)
+      goto error_sampler_ycbcr;
+
+   memset(&blend, 0, sizeof blend);
+   blend.independent_blend_enable = 0;
+   blend.rt[0].blend_enable = 1;
+   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
+   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
+   blend.logicop_enable = 0;
+   blend.logicop_func = PIPE_LOGICOP_CLEAR;
+   blend.rt[0].colormask = PIPE_MASK_RGBA;
+   blend.dither = 0;
+   r->blend_clear = r->pipe->create_blend_state(r->pipe, &blend);
+   if (!r->blend_clear)
+      goto error_blend_clear;
+
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   r->blend_add = r->pipe->create_blend_state(r->pipe, &blend);
+   if (!r->blend_add)
+      goto error_blend_add;
+
+   memset(&rs_state, 0, sizeof(rs_state));
+   /*rs_state.sprite_coord_enable */
+   rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
+   rs_state.point_quad_rasterization = true;
+   rs_state.point_size = BLOCK_WIDTH;
+   rs_state.gl_rasterization_rules = true;
+   r->rs_state = r->pipe->create_rasterizer_state(r->pipe, &rs_state);
+   if (!r->rs_state)
+      goto error_rs_state;
+
+   return true;
+
+error_rs_state:
+   r->pipe->delete_blend_state(r->pipe, r->blend_add);
+
+error_blend_add:
+   r->pipe->delete_blend_state(r->pipe, r->blend_clear);
+
+error_blend_clear:
+   r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
+
+error_sampler_ref:
+   r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr);
+
+error_sampler_ycbcr:
+   return false;
+}
+
+static void
+cleanup_pipe_state(struct vl_mc *r)
+{
+   assert(r);
+
+   r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
+   r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr);
+   r->pipe->delete_blend_state(r->pipe, r->blend_clear);
+   r->pipe->delete_blend_state(r->pipe, r->blend_add);
+   r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
+}
+
+bool
+vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe,
+           unsigned buffer_width, unsigned buffer_height,
+           unsigned macroblock_size, float scale)
+{
+   assert(renderer);
+   assert(pipe);
+
+   memset(renderer, 0, sizeof(struct vl_mc));
+
+   renderer->pipe = pipe;
+   renderer->buffer_width = buffer_width;
+   renderer->buffer_height = buffer_height;
+   renderer->macroblock_size = macroblock_size;
+
+   if (!init_pipe_state(renderer))
+      goto error_pipe_state;
+
+   renderer->vs_ref = create_ref_vert_shader(renderer);
+   if (!renderer->vs_ref)
+      goto error_vs_ref;
+
+   renderer->vs_ycbcr = create_ycbcr_vert_shader(renderer);
+   if (!renderer->vs_ycbcr)
+      goto error_vs_ycbcr;
+
+   renderer->fs_ref = create_ref_frag_shader(renderer);
+   if (!renderer->fs_ref)
+      goto error_fs_ref;
+
+   renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale);
+   if (!renderer->fs_ycbcr)
+      goto error_fs_ycbcr;
+
+   return true;
+
+error_fs_ycbcr:
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
+
+error_fs_ref:
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr);
+
+error_vs_ycbcr:
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref);
+
+error_vs_ref:
+   cleanup_pipe_state(renderer);
+
+error_pipe_state:
+   return false;
+}
+
+void
+vl_mc_cleanup(struct vl_mc *renderer)
+{
+   assert(renderer);
+
+   cleanup_pipe_state(renderer);
+
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref);
+   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr);
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
+}
+
+bool
+vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer,
+                  struct pipe_sampler_view *source)
+{
+   assert(renderer && buffer);
+   assert(source);
+
+   buffer->renderer = renderer;
+
+   buffer->viewport.scale[2] = 1;
+   buffer->viewport.scale[3] = 1;
+   buffer->viewport.translate[0] = 0;
+   buffer->viewport.translate[1] = 0;
+   buffer->viewport.translate[2] = 0;
+   buffer->viewport.translate[3] = 0;
+
+   buffer->fb_state.nr_cbufs = 1;
+   buffer->fb_state.zsbuf = NULL;
+
+   pipe_sampler_view_reference(&buffer->source, source);
+
+   return true;
+}
+
+void
+vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer)
+{
+   assert(buffer);
+
+   pipe_sampler_view_reference(&buffer->source, NULL);
+}
+
+void
+vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface)
+{
+   assert(buffer && surface);
+
+   buffer->surface_cleared = false;
+
+   buffer->viewport.scale[0] = surface->width;
+   buffer->viewport.scale[1] = surface->height;
+
+   buffer->fb_state.width = surface->width;
+   buffer->fb_state.height = surface->height;
+   buffer->fb_state.cbufs[0] = surface;
+}
+
+static void
+prepare_pipe_4_rendering(struct vl_mc_buffer *buffer)
+{
+   struct vl_mc *renderer;
+
+   assert(buffer);
+
+   renderer = buffer->renderer;
+   renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
+
+   if (buffer->surface_cleared)
+      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add);
+   else {
+      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear);
+      buffer->surface_cleared = true;
+   }
+
+   renderer->pipe->set_framebuffer_state(renderer->pipe, &buffer->fb_state);
+   renderer->pipe->set_viewport_state(renderer->pipe, &buffer->viewport);
+}
+
+void
+vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref,
+                 unsigned not_empty_start_instance, unsigned not_empty_num_instances,
+                 unsigned empty_start_instance, unsigned empty_num_instances)
+{
+   struct vl_mc *renderer;
+
+   assert(buffer && ref);
+
+   if (not_empty_num_instances == 0 && empty_num_instances == 0)
+      return;
+
+   prepare_pipe_4_rendering(buffer);
+
+   renderer = buffer->renderer;
+
+   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ref);
+   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ref);
+
+   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &ref);
+   renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ref);
+
+   if (not_empty_num_instances > 0)
+      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
+                                 not_empty_start_instance, not_empty_num_instances);
+
+   if (empty_num_instances > 0)
+      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
+                                 empty_start_instance, empty_num_instances);
+}
+
+void
+vl_mc_render_ycbcr(struct vl_mc_buffer *buffer,
+                   unsigned not_empty_start_instance, unsigned not_empty_num_instances)
+{
+   struct vl_mc *renderer;
+
+   assert(buffer);
+
+   if (not_empty_num_instances == 0)
+      return;
+
+   prepare_pipe_4_rendering(buffer);
+
+   renderer = buffer->renderer;
+
+   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ycbcr);
+   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
+
+   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &buffer->source);
+   renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ycbcr);
+
+   util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
+                              not_empty_start_instance, not_empty_num_instances);
+}
diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h
new file mode 100644
index 00000000000..e5b16b5b9da
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mc.h
@@ -0,0 +1,85 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_mc_h
+#define vl_mc_h
+
+#include <pipe/p_state.h>
+#include <pipe/p_video_state.h>
+
+#include "vl_types.h"
+
+struct pipe_context;
+
+struct vl_mc
+{
+   struct pipe_context *pipe;
+   unsigned buffer_width;
+   unsigned buffer_height;
+   unsigned macroblock_size;
+
+   void *rs_state;
+
+   void *blend_clear, *blend_add;
+   void *vs_ref, *vs_ycbcr;
+   void *fs_ref, *fs_ycbcr;
+   void *sampler_ref, *sampler_ycbcr;
+};
+
+struct vl_mc_buffer
+{
+   struct vl_mc *renderer;
+
+   bool surface_cleared;
+
+   struct pipe_viewport_state viewport;
+   struct pipe_framebuffer_state fb_state;
+
+   struct pipe_sampler_view *source;
+};
+
+bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe,
+                unsigned picture_width, unsigned picture_height,
+                unsigned macroblock_size, float scale);
+
+void vl_mc_cleanup(struct vl_mc *renderer);
+
+bool vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer,
+                       struct pipe_sampler_view *source);
+
+void vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer);
+
+void vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface);
+
+void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref,
+                      unsigned not_empty_start_instance, unsigned not_empty_num_instances,
+                      unsigned empty_start_instance, unsigned empty_num_instances);
+
+void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer,
+                        unsigned not_empty_start_instance, unsigned not_empty_num_instances);
+
+#endif /* vl_mc_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 6f93c5a01ab..4d494b0bd2a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -31,7 +31,7 @@
 #include <pipe/p_video_context.h>
 
 #include "vl_idct.h"
-#include "vl_mpeg12_mc_renderer.h"
+#include "vl_mc.h"
 
 #include "vl_vertex_buffers.h"
 #include "vl_video_buffer.h"
@@ -56,7 +56,7 @@ struct vl_mpeg12_decoder
    void *ves_mv[2];
 
    struct vl_idct idct_y, idct_c;
-   struct vl_mpeg12_mc_renderer mc_y, mc_c;
+   struct vl_mc mc_y, mc_c;
 
    void *dsa;
    void *blend;
@@ -81,7 +81,7 @@ struct vl_mpeg12_buffer
    } vertex_bufs;
 
    struct vl_idct_buffer idct[VL_MAX_PLANES];
-   struct vl_mpeg12_mc_buffer mc[VL_MAX_PLANES];
+   struct vl_mc_buffer mc[VL_MAX_PLANES];
 
    struct pipe_transfer *tex_transfer[VL_MAX_PLANES];
    short *texels[VL_MAX_PLANES];
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
deleted file mode 100644
index eae546aefcc..00000000000
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ /dev/null
@@ -1,668 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include <assert.h>
-
-#include <pipe/p_context.h>
-
-#include <util/u_sampler.h>
-#include <util/u_draw.h>
-
-#include <tgsi/tgsi_ureg.h>
-
-#include "vl_defines.h"
-#include "vl_vertex_buffers.h"
-#include "vl_mpeg12_mc_renderer.h"
-
-enum VS_OUTPUT
-{
-   VS_O_VPOS,
-   VS_O_LINE,
-   VS_O_VTOP,
-   VS_O_VBOTTOM
-};
-
-static struct ureg_dst
-calc_position(struct vl_mpeg12_mc_renderer *r, struct ureg_program *shader)
-{
-   struct ureg_src block_scale;
-   struct ureg_src vrect, vpos;
-   struct ureg_dst t_vpos;
-   struct ureg_dst o_vpos;
-
-   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
-   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
-
-   t_vpos = ureg_DECL_temporary(shader);
-
-   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
-
-   /*
-    * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
-    *
-    * t_vpos = (vpos + vrect) * block_scale
-    * o_vpos.xy = t_vpos
-    * o_vpos.zw = vpos
-    */
-   block_scale = ureg_imm2f(shader,
-      (float)MACROBLOCK_WIDTH / r->buffer_width,
-      (float)MACROBLOCK_HEIGHT / r->buffer_height);
-
-   ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
-   ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
-   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
-
-   return t_vpos;
-}
-
-static void *
-create_ycbcr_vert_shader(struct vl_mpeg12_mc_renderer *r)
-{
-   struct ureg_program *shader;
-   struct ureg_src block_scale;
-   struct ureg_src vrect, vpos, eb, flags;
-   struct ureg_dst t_vpos, t_vtex;
-   struct ureg_dst o_line, o_vtex[2];
-   unsigned label;
-
-   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
-   if (!shader)
-      return NULL;
-
-   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
-   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
-   eb = ureg_DECL_vs_input(shader, VS_I_EB);
-   flags = ureg_DECL_vs_input(shader, VS_I_FLAGS);
-
-   t_vpos = calc_position(r, shader);
-   t_vtex = ureg_DECL_temporary(shader);
-
-   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
-   o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
-   o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
-
-   /*
-    * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
-    *
-    * o_line.x = interlaced
-    * o_line.y = vrect
-    *
-    * o_vtex[0].z = vrect.x ? eb.y : eb.x
-    * o_vtex[1].z = vrect.x ? eb.w : eb.z
-    *
-    * if(interlaced) {
-    *    t_vtex.x = vrect.x
-    *    t_vtex.y = vrect.y * 0.5
-    *    t_vtex += vpos
-    *
-    *    o_vtex[0].xy = t_vtex * block_scale
-    *
-    *    t_vtex.y += 0.5
-    *    o_vtex[1].xy = t_vtex * block_scale
-    * } else {
-    *    o_vtex[0..1].xy = t_vpos
-    * }
-    * o_vtex[2].xy = t_vpos
-    *
-    */
-   block_scale = ureg_imm2f(shader,
-      (float)MACROBLOCK_WIDTH / r->buffer_width,
-      (float)MACROBLOCK_HEIGHT / r->buffer_height);
-
-   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), flags, ureg_imm1f(shader, 0.5f));
-   ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect);
-
-   ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-   ureg_CMP(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_Z),
-            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
-            ureg_scalar(eb, TGSI_SWIZZLE_Y),
-            ureg_scalar(eb, TGSI_SWIZZLE_X));
-
-   ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-   ureg_CMP(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_Z),
-            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
-            ureg_scalar(eb, TGSI_SWIZZLE_W),
-            ureg_scalar(eb, TGSI_SWIZZLE_Z));
-
-   if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO
-      ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_Y), &label);
-
-         ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
-         ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
-         ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex));
-         ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
-         ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
-         ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
-
-         ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
-            ureg_scalar(vrect, TGSI_SWIZZLE_Y),
-            ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
-
-      ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
-      ureg_ENDIF(shader);
-   }
-
-   ureg_release_temporary(shader, t_vtex);
-   ureg_release_temporary(shader, t_vpos);
-
-   ureg_END(shader);
-
-   return ureg_create_shader_and_destroy(shader, r->pipe);
-}
-
-static void *
-create_ref_vert_shader(struct vl_mpeg12_mc_renderer *r)
-{
-   struct ureg_program *shader;
-   struct ureg_src mv_scale;
-   struct ureg_src vrect, vmv[2];
-   struct ureg_dst t_vpos;
-   struct ureg_dst o_vpos, o_line, o_vmv[2];
-   unsigned i;
-
-   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
-   if (!shader)
-      return NULL;
-
-   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
-   ureg_DECL_vs_input(shader, VS_I_EB);
-   ureg_DECL_vs_input(shader, VS_I_FLAGS);
-   vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP);
-   vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM);
-
-   t_vpos = calc_position(r, shader);
-
-   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
-   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
-   o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
-   o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
-
-   /*
-    * mv_scale = 0.5 / (dst.width, dst.height);
-    *
-    * // Apply motion vectors
-    * o_vmv[0..3] = t_vpos + vmv[0..3] * mv_scale
-    *
-    * o_line.y = vrect
-    *
-    */
-
-   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
-      vrect, ureg_imm1f(shader, r->macroblock_size / 2));
-
-   mv_scale = ureg_imm4f(shader,
-      0.5f / r->buffer_width,
-      0.5f / r->buffer_height,
-      1.0f / 4.0f,
-      1.0f / 255.0f);
-
-   for (i = 0; i < 2; ++i) {
-      ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
-      ureg_MUL(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_ZW), mv_scale, vmv[i]);
-   }
-
-   ureg_release_temporary(shader, t_vpos);
-
-   ureg_END(shader);
-
-   return ureg_create_shader_and_destroy(shader, r->pipe);
-}
-
-static struct ureg_dst
-calc_field(struct ureg_program *shader)
-{
-   struct ureg_dst tmp;
-   struct ureg_src line;
-
-   tmp = ureg_DECL_temporary(shader);
-
-   line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
-
-   /*
-    * line.x is flag for intra frames
-    * line.y going from 0 to 1 if not interlaced
-    * line.y going from 0 to 8 in steps of 0.5 if interlaced
-    *
-    * tmp.xy = fraction(line)
-    * tmp.xy = tmp.xy >= 0.5 ? 1 : 0
-    */
-   ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), line);
-   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
-
-   return tmp;
-}
-
-static void *
-create_ycbcr_frag_shader(struct vl_mpeg12_mc_renderer *r, float scale)
-{
-   struct ureg_program *shader;
-   struct ureg_src tc[2], sampler;
-   struct ureg_dst texel, t_tc, field;
-   struct ureg_dst fragment;
-   unsigned label;
-
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
-
-   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
-   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);
-
-   sampler = ureg_DECL_sampler(shader, 0);
-
-   t_tc = ureg_DECL_temporary(shader);
-   texel = ureg_DECL_temporary(shader);
-
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
-   field = calc_field(shader);
-
-   /*
-    * texel.y  = tex(field.y ? tc[1] : tc[0], sampler[0])
-    * texel.cb = tex(tc[2], sampler[1])
-    * texel.cr = tex(tc[2], sampler[2])
-    */
-
-   ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XYZ),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
-            tc[1], tc[0]);
-
-   ureg_SLT(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc), ureg_imm1f(shader, 0.5f));
-
-   ureg_MOV(shader, fragment, ureg_imm4f(shader, 0.0f, 0.0f, 0.0f, 1.0f));
-   ureg_IF(shader, ureg_scalar(ureg_src(t_tc), TGSI_SWIZZLE_Z), &label);
-
-      ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
-
-      if (scale != 1.0f)
-         ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
-                  ureg_src(texel), ureg_imm1f(shader, scale),
-                  ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X));
-      else
-         ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
-                  ureg_src(texel), ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X));
-
-   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
-   ureg_ENDIF(shader);
-
-   ureg_release_temporary(shader, t_tc);
-   ureg_release_temporary(shader, texel);
-
-   return ureg_create_shader_and_destroy(shader, r->pipe);
-}
-
-static void *
-create_ref_frag_shader(struct vl_mpeg12_mc_renderer *r)
-{
-   const float y_scale =
-      r->buffer_height / 2 *
-      r->macroblock_size / MACROBLOCK_HEIGHT;
-
-   struct ureg_program *shader;
-   struct ureg_src tc[2], sampler;
-   struct ureg_dst ref, field;
-   struct ureg_dst fragment;
-   unsigned label;
-
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
-
-   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
-   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);
-
-   sampler = ureg_DECL_sampler(shader, 0);
-   ref = ureg_DECL_temporary(shader);
-
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
-   field = calc_field(shader);
-
-   /*
-    * ref = field.z ? tc[1] : tc[0]
-    *
-    * // Adjust tc acording to top/bottom field selection
-    * if (|ref.z|) {
-    *    ref.y *= y_scale
-    *    ref.y = floor(ref.y)
-    *    ref.y += ref.z
-    *    ref.y /= y_scale
-    * }
-    * fragment.xyz = tex(ref, sampler[0])
-    */
-   ureg_CMP(shader, ureg_writemask(ref, TGSI_WRITEMASK_XYZ),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
-            tc[1], tc[0]);
-   ureg_CMP(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
-            tc[1], tc[0]);
-
-   ureg_IF(shader, ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z), &label);
-
-      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
-               ureg_src(ref), ureg_imm1f(shader, y_scale));
-      ureg_FLR(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y), ureg_src(ref));
-      ureg_ADD(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
-               ureg_src(ref), ureg_scalar(ureg_src(ref), TGSI_SWIZZLE_Z));
-      ureg_MUL(shader, ureg_writemask(ref, TGSI_WRITEMASK_Y),
-               ureg_src(ref), ureg_imm1f(shader, 1.0f / y_scale));
-
-   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
-   ureg_ENDIF(shader);
-
-   ureg_TEX(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), TGSI_TEXTURE_2D, ureg_src(ref), sampler);
-
-   ureg_release_temporary(shader, ref);
-
-   ureg_release_temporary(shader, field);
-   ureg_END(shader);
-
-   return ureg_create_shader_and_destroy(shader, r->pipe);
-}
-
-static bool
-init_pipe_state(struct vl_mpeg12_mc_renderer *r)
-{
-   struct pipe_sampler_state sampler;
-   struct pipe_blend_state blend;
-   struct pipe_rasterizer_state rs_state;
-
-   assert(r);
-
-   memset(&sampler, 0, sizeof(sampler));
-   sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-   sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
-   sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
-   sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
-   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
-   sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
-   sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
-   sampler.compare_func = PIPE_FUNC_ALWAYS;
-   sampler.normalized_coords = 1;
-   r->sampler_ref = r->pipe->create_sampler_state(r->pipe, &sampler);
-   if (!r->sampler_ref)
-      goto error_sampler_ref;
-
-   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
-   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
-   r->sampler_ycbcr = r->pipe->create_sampler_state(r->pipe, &sampler);
-   if (!r->sampler_ycbcr)
-      goto error_sampler_ycbcr;
-
-   memset(&blend, 0, sizeof blend);
-   blend.independent_blend_enable = 0;
-   blend.rt[0].blend_enable = 1;
-   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
-   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
-   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
-   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
-   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
-   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
-   blend.logicop_enable = 0;
-   blend.logicop_func = PIPE_LOGICOP_CLEAR;
-   blend.rt[0].colormask = PIPE_MASK_RGBA;
-   blend.dither = 0;
-   r->blend_clear = r->pipe->create_blend_state(r->pipe, &blend);
-   if (!r->blend_clear)
-      goto error_blend_clear;
-
-   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
-   r->blend_add = r->pipe->create_blend_state(r->pipe, &blend);
-   if (!r->blend_add)
-      goto error_blend_add;
-
-   memset(&rs_state, 0, sizeof(rs_state));
-   /*rs_state.sprite_coord_enable */
-   rs_state.sprite_coord_mode = PIPE_SPRITE_COORD_UPPER_LEFT;
-   rs_state.point_quad_rasterization = true;
-   rs_state.point_size = BLOCK_WIDTH;
-   rs_state.gl_rasterization_rules = true;
-   r->rs_state = r->pipe->create_rasterizer_state(r->pipe, &rs_state);
-   if (!r->rs_state)
-      goto error_rs_state;
-
-   return true;
-
-error_rs_state:
-   r->pipe->delete_blend_state(r->pipe, r->blend_add);
-
-error_blend_add:
-   r->pipe->delete_blend_state(r->pipe, r->blend_clear);
-
-error_blend_clear:
-   r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
-
-error_sampler_ref:
-   r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr);
-
-error_sampler_ycbcr:
-   return false;
-}
-
-static void
-cleanup_pipe_state(struct vl_mpeg12_mc_renderer *r)
-{
-   assert(r);
-
-   r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
-   r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr);
-   r->pipe->delete_blend_state(r->pipe, r->blend_clear);
-   r->pipe->delete_blend_state(r->pipe, r->blend_add);
-   r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
-}
-
-bool
-vl_mc_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe,
-           unsigned buffer_width, unsigned buffer_height,
-           unsigned macroblock_size, float scale)
-{
-   assert(renderer);
-   assert(pipe);
-
-   memset(renderer, 0, sizeof(struct vl_mpeg12_mc_renderer));
-
-   renderer->pipe = pipe;
-   renderer->buffer_width = buffer_width;
-   renderer->buffer_height = buffer_height;
-   renderer->macroblock_size = macroblock_size;
-
-   if (!init_pipe_state(renderer))
-      goto error_pipe_state;
-
-   renderer->vs_ref = create_ref_vert_shader(renderer);
-   if (!renderer->vs_ref)
-      goto error_vs_ref;
-
-   renderer->vs_ycbcr = create_ycbcr_vert_shader(renderer);
-   if (!renderer->vs_ycbcr)
-      goto error_vs_ycbcr;
-
-   renderer->fs_ref = create_ref_frag_shader(renderer);
-   if (!renderer->fs_ref)
-      goto error_fs_ref;
-
-   renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale);
-   if (!renderer->fs_ycbcr)
-      goto error_fs_ycbcr;
-
-   return true;
-
-error_fs_ycbcr:
-   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
-
-error_fs_ref:
-   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr);
-
-error_vs_ycbcr:
-   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref);
-
-error_vs_ref:
-   cleanup_pipe_state(renderer);
-
-error_pipe_state:
-   return false;
-}
-
-void
-vl_mc_cleanup(struct vl_mpeg12_mc_renderer *renderer)
-{
-   assert(renderer);
-
-   cleanup_pipe_state(renderer);
-
-   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ref);
-   renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr);
-   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
-   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
-}
-
-bool
-vl_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
-                  struct pipe_sampler_view *source)
-{
-   assert(renderer && buffer);
-   assert(source);
-
-   buffer->renderer = renderer;
-
-   buffer->viewport.scale[2] = 1;
-   buffer->viewport.scale[3] = 1;
-   buffer->viewport.translate[0] = 0;
-   buffer->viewport.translate[1] = 0;
-   buffer->viewport.translate[2] = 0;
-   buffer->viewport.translate[3] = 0;
-
-   buffer->fb_state.nr_cbufs = 1;
-   buffer->fb_state.zsbuf = NULL;
-
-   pipe_sampler_view_reference(&buffer->source, source);
-
-   return true;
-}
-
-void
-vl_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer)
-{
-   assert(buffer);
-
-   pipe_sampler_view_reference(&buffer->source, NULL);
-}
-
-void
-vl_mc_set_surface(struct vl_mpeg12_mc_buffer *buffer, struct pipe_surface *surface)
-{
-   assert(buffer && surface);
-
-   buffer->surface_cleared = false;
-
-   buffer->viewport.scale[0] = surface->width;
-   buffer->viewport.scale[1] = surface->height;
-
-   buffer->fb_state.width = surface->width;
-   buffer->fb_state.height = surface->height;
-   buffer->fb_state.cbufs[0] = surface;
-}
-
-static void
-prepare_pipe_4_rendering(struct vl_mpeg12_mc_buffer *buffer)
-{
-   struct vl_mpeg12_mc_renderer *renderer;
-
-   assert(buffer);
-
-   renderer = buffer->renderer;
-   renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
-
-   if (buffer->surface_cleared)
-      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add);
-   else {
-      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear);
-      buffer->surface_cleared = true;
-   }
-
-   renderer->pipe->set_framebuffer_state(renderer->pipe, &buffer->fb_state);
-   renderer->pipe->set_viewport_state(renderer->pipe, &buffer->viewport);
-}
-
-void
-vl_mc_render_ref(struct vl_mpeg12_mc_buffer *buffer, struct pipe_sampler_view *ref,
-                 unsigned not_empty_start_instance, unsigned not_empty_num_instances,
-                 unsigned empty_start_instance, unsigned empty_num_instances)
-{
-   struct vl_mpeg12_mc_renderer *renderer;
-
-   assert(buffer && ref);
-
-   if (not_empty_num_instances == 0 && empty_num_instances == 0)
-      return;
-
-   prepare_pipe_4_rendering(buffer);
-
-   renderer = buffer->renderer;
-
-   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ref);
-   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ref);
-
-   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &ref);
-   renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ref);
-
-   if (not_empty_num_instances > 0)
-      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
-                                 not_empty_start_instance, not_empty_num_instances);
-
-   if (empty_num_instances > 0)
-      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
-                                 empty_start_instance, empty_num_instances);
-}
-
-void
-vl_mc_render_ycbcr(struct vl_mpeg12_mc_buffer *buffer,
-                   unsigned not_empty_start_instance, unsigned not_empty_num_instances)
-{
-   struct vl_mpeg12_mc_renderer *renderer;
-
-   assert(buffer);
-
-   if (not_empty_num_instances == 0)
-      return;
-
-   prepare_pipe_4_rendering(buffer);
-
-   renderer = buffer->renderer;
-
-   renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ycbcr);
-   renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
-
-   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &buffer->source);
-   renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ycbcr);
-
-   util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
-                              not_empty_start_instance, not_empty_num_instances);
-}
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
deleted file mode 100644
index 4137ac407d2..00000000000
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef vl_mpeg12_mc_renderer_h
-#define vl_mpeg12_mc_renderer_h
-
-#include <pipe/p_state.h>
-#include <pipe/p_video_state.h>
-
-#include "vl_types.h"
-
-struct pipe_context;
-
-struct vl_mpeg12_mc_renderer
-{
-   struct pipe_context *pipe;
-   unsigned buffer_width;
-   unsigned buffer_height;
-   unsigned macroblock_size;
-
-   void *rs_state;
-
-   void *blend_clear, *blend_add;
-   void *vs_ref, *vs_ycbcr;
-   void *fs_ref, *fs_ycbcr;
-   void *sampler_ref, *sampler_ycbcr;
-};
-
-struct vl_mpeg12_mc_buffer
-{
-   struct vl_mpeg12_mc_renderer *renderer;
-
-   bool surface_cleared;
-
-   struct pipe_viewport_state viewport;
-   struct pipe_framebuffer_state fb_state;
-
-   struct pipe_sampler_view *source;
-};
-
-bool vl_mc_init(struct vl_mpeg12_mc_renderer *renderer, struct pipe_context *pipe,
-                unsigned picture_width, unsigned picture_height,
-                unsigned macroblock_size, float scale);
-
-void vl_mc_cleanup(struct vl_mpeg12_mc_renderer *renderer);
-
-bool vl_mc_init_buffer(struct vl_mpeg12_mc_renderer *renderer, struct vl_mpeg12_mc_buffer *buffer,
-                       struct pipe_sampler_view *source);
-
-void vl_mc_cleanup_buffer(struct vl_mpeg12_mc_buffer *buffer);
-
-void vl_mc_set_surface(struct vl_mpeg12_mc_buffer *buffer, struct pipe_surface *surface);
-
-void vl_mc_render_ref(struct vl_mpeg12_mc_buffer *buffer, struct pipe_sampler_view *ref,
-                      unsigned not_empty_start_instance, unsigned not_empty_num_instances,
-                      unsigned empty_start_instance, unsigned empty_num_instances);
-
-void vl_mc_render_ycbcr(struct vl_mpeg12_mc_buffer *buffer,
-                        unsigned not_empty_start_instance, unsigned not_empty_num_instances);
-
-#endif /* vl_mpeg12_mc_renderer_h */
-- 
cgit v1.2.3


From 0a2310b375068694d5700395aededc3fe68a0f3a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 17 Apr 2011 13:01:56 +0200
Subject: [g3dvl] move blender state into idct code

---
 src/gallium/auxiliary/vl/vl_idct.c           | 27 ++++++++++++++++++++++++++-
 src/gallium/auxiliary/vl/vl_idct.h           |  1 +
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 20 --------------------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h |  1 -
 4 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index a20263bc182..4a500a7489c 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -400,8 +400,9 @@ cleanup_shaders(struct vl_idct *idct)
 static bool
 init_state(struct vl_idct *idct)
 {
-   struct pipe_sampler_state sampler;
+   struct pipe_blend_state blend;
    struct pipe_rasterizer_state rs_state;
+   struct pipe_sampler_state sampler;
    unsigned i;
 
    assert(idct);
@@ -412,6 +413,25 @@ init_state(struct vl_idct *idct)
    if (!idct->rs_state)
       goto error_rs_state;
 
+   memset(&blend, 0, sizeof blend);
+
+   blend.independent_blend_enable = 0;
+   blend.rt[0].blend_enable = 0;
+   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.logicop_enable = 0;
+   blend.logicop_func = PIPE_LOGICOP_CLEAR;
+   /* Needed to allow color writes to FB, even if blending disabled */
+   blend.rt[0].colormask = PIPE_MASK_RGBA;
+   blend.dither = 0;
+   idct->blend = idct->pipe->create_blend_state(idct->pipe, &blend);
+   if (!idct->blend)
+      goto error_blend;
+
    for (i = 0; i < 2; ++i) {
       memset(&sampler, 0, sizeof(sampler));
       sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
@@ -437,6 +457,9 @@ error_samplers:
 
    idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
 
+error_blend:
+   idct->pipe->delete_blend_state(idct->pipe, idct->blend);
+
 error_rs_state:
    return false;
 }
@@ -450,6 +473,7 @@ cleanup_state(struct vl_idct *idct)
       idct->pipe->delete_sampler_state(idct->pipe, idct->samplers[i]);
 
    idct->pipe->delete_rasterizer_state(idct->pipe, idct->rs_state);
+   idct->pipe->delete_blend_state(idct->pipe, idct->blend);
 }
 
 static bool
@@ -691,6 +715,7 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_
       num_verts = idct->blocks_x * idct->blocks_y * 4;
 
       idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
+      idct->pipe->bind_blend_state(idct->pipe, idct->blend);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
 
       /* first stage */
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 0875f17476c..7f00e3e28b4 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -43,6 +43,7 @@ struct vl_idct
    unsigned nr_of_render_targets;
 
    void *rs_state;
+   void *blend;
 
    void *samplers[2];
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 08b740c6199..6d10cd16e32 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -276,7 +276,6 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    dec->pipe->bind_vs_state(dec->pipe, NULL);
    dec->pipe->bind_fs_state(dec->pipe, NULL);
 
-   dec->pipe->delete_blend_state(dec->pipe, dec->blend);
    dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa);
 
    vl_mc_cleanup(&dec->mc_y);
@@ -489,7 +488,6 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
          vl_mc_render_ref(&buf->mc[i], sv[j][i], ne_start, ne_num, e_start, e_num);
       }
 
-      dec->pipe->bind_blend_state(dec->pipe, dec->blend);
       dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_eb[i]);
 
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
@@ -515,29 +513,11 @@ vl_mpeg12_decoder_clear_buffer(struct pipe_video_decode_buffer *buffer)
 static bool
 init_pipe_state(struct vl_mpeg12_decoder *dec)
 {
-   struct pipe_blend_state blend;
    struct pipe_depth_stencil_alpha_state dsa;
    unsigned i;
 
    assert(dec);
 
-   memset(&blend, 0, sizeof blend);
-
-   blend.independent_blend_enable = 0;
-   blend.rt[0].blend_enable = 0;
-   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
-   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
-   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
-   blend.logicop_enable = 0;
-   blend.logicop_func = PIPE_LOGICOP_CLEAR;
-   /* Needed to allow color writes to FB, even if blending disabled */
-   blend.rt[0].colormask = PIPE_MASK_RGBA;
-   blend.dither = 0;
-   dec->blend = dec->pipe->create_blend_state(dec->pipe, &blend);
-
    memset(&dsa, 0, sizeof dsa);
    dsa.depth.enabled = 0;
    dsa.depth.writemask = 0;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 4d494b0bd2a..ffb9e56a13c 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -59,7 +59,6 @@ struct vl_mpeg12_decoder
    struct vl_mc mc_y, mc_c;
 
    void *dsa;
-   void *blend;
 };
 
 struct vl_mpeg12_buffer
-- 
cgit v1.2.3


From b8a6e0e6fc451096d684a1e18529ab4879cdba0a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 17 Apr 2011 18:53:22 +0200
Subject: [g3dvl] give mv their own vertex buffer back

---
 src/gallium/auxiliary/vl/vl_defines.h          |   3 +
 src/gallium/auxiliary/vl/vl_mc.c               |  25 +--
 src/gallium/auxiliary/vl/vl_mc.h               |   7 +-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   |  54 +++---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h   |  10 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.c   | 230 +++++++++++++++++--------
 src/gallium/auxiliary/vl/vl_vertex_buffers.h   |  32 ++--
 src/gallium/auxiliary/vl/vl_video_buffer.h     |   2 +-
 src/gallium/include/pipe/p_video_state.h       |  14 +-
 src/gallium/state_trackers/xorg/xvmc/surface.c |  20 +--
 10 files changed, 246 insertions(+), 151 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_defines.h b/src/gallium/auxiliary/vl/vl_defines.h
index 668991f904f..7568db027e6 100644
--- a/src/gallium/auxiliary/vl/vl_defines.h
+++ b/src/gallium/auxiliary/vl/vl_defines.h
@@ -35,4 +35,7 @@
 #define BLOCK_WIDTH 8
 #define BLOCK_HEIGHT 8
 
+#define VL_MAX_PLANES 3
+#define VL_MAX_REF_FRAMES 2
+
 #endif
diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
index 707a4a27077..2624d0502c1 100644
--- a/src/gallium/auxiliary/vl/vl_mc.c
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -612,17 +612,12 @@ prepare_pipe_4_rendering(struct vl_mc_buffer *buffer)
 }
 
 void
-vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref,
-                 unsigned not_empty_start_instance, unsigned not_empty_num_instances,
-                 unsigned empty_start_instance, unsigned empty_num_instances)
+vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref)
 {
    struct vl_mc *renderer;
 
    assert(buffer && ref);
 
-   if (not_empty_num_instances == 0 && empty_num_instances == 0)
-      return;
-
    prepare_pipe_4_rendering(buffer);
 
    renderer = buffer->renderer;
@@ -633,24 +628,19 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref,
    renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &ref);
    renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ref);
 
-   if (not_empty_num_instances > 0)
-      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
-                                 not_empty_start_instance, not_empty_num_instances);
-
-   if (empty_num_instances > 0)
-      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
-                                 empty_start_instance, empty_num_instances);
+   util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0,
+                              renderer->buffer_width / MACROBLOCK_WIDTH *
+                              renderer->buffer_height / MACROBLOCK_HEIGHT);
 }
 
 void
-vl_mc_render_ycbcr(struct vl_mc_buffer *buffer,
-                   unsigned not_empty_start_instance, unsigned not_empty_num_instances)
+vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned num_instances)
 {
    struct vl_mc *renderer;
 
    assert(buffer);
 
-   if (not_empty_num_instances == 0)
+   if (num_instances == 0)
       return;
 
    prepare_pipe_4_rendering(buffer);
@@ -663,6 +653,5 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer,
    renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &buffer->source);
    renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ycbcr);
 
-   util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4,
-                              not_empty_start_instance, not_empty_num_instances);
+   util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
 }
diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h
index e5b16b5b9da..bc2b0e7f149 100644
--- a/src/gallium/auxiliary/vl/vl_mc.h
+++ b/src/gallium/auxiliary/vl/vl_mc.h
@@ -75,11 +75,8 @@ void vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer);
 
 void vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface);
 
-void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref,
-                      unsigned not_empty_start_instance, unsigned not_empty_num_instances,
-                      unsigned empty_start_instance, unsigned empty_num_instances);
+void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref);
 
-void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer,
-                        unsigned not_empty_start_instance, unsigned not_empty_num_instances);
+void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned num_instances);
 
 #endif /* vl_mc_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 6d10cd16e32..1d8f0f92c37 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -293,6 +293,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
       dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv[i]);
 
    pipe_resource_reference(&dec->quads.buffer, NULL);
+   pipe_resource_reference(&dec->pos.buffer, NULL);
 
    FREE(dec);
 }
@@ -389,15 +390,9 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    buffer->base.add_macroblocks = vl_mpeg12_buffer_add_macroblocks;
    buffer->base.unmap = vl_mpeg12_buffer_unmap;
 
-   buffer->vertex_bufs.individual.quad.stride = dec->quads.stride;
-   buffer->vertex_bufs.individual.quad.buffer_offset = dec->quads.buffer_offset;
-   pipe_resource_reference(&buffer->vertex_bufs.individual.quad.buffer, dec->quads.buffer);
-
-   buffer->vertex_bufs.individual.stream = vl_vb_init(&buffer->vertex_stream, dec->pipe,
-                                                      dec->base.width / MACROBLOCK_WIDTH *
-                                                      dec->base.height / MACROBLOCK_HEIGHT);
-   if (!buffer->vertex_bufs.individual.stream.buffer)
-      goto error_vertex_stream;
+   vl_vb_init(&buffer->vertex_stream, dec->pipe,
+              dec->base.width / MACROBLOCK_WIDTH,
+              dec->base.height / MACROBLOCK_HEIGHT);
 
    formats[0] = formats[1] = formats[2] =dec->mc_source_format;
    buffer->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
@@ -461,7 +456,9 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
    struct pipe_sampler_view **sv[2];
    struct pipe_surface **surfaces;
 
-   unsigned ne_start, ne_num, e_start, e_num;
+   struct pipe_vertex_buffer vb[3];
+
+   unsigned num_instances;
    unsigned i, j;
 
    assert(buf);
@@ -474,9 +471,10 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 
    surfaces = dst->get_surfaces(dst);
 
-   vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
+   num_instances = vl_vb_restart(&buf->vertex_stream);
 
-   dec->pipe->set_vertex_buffers(dec->pipe, 2, buf->vertex_bufs.all);
+   vb[0] = dec->quads;
+   vb[1] = dec->pos;
 
    for (i = 0; i < VL_MAX_PLANES; ++i) {
       vl_mc_set_surface(&buf->mc[i], surfaces[i]);
@@ -484,18 +482,25 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
       for (j = 0; j < 2; ++j) {
          if (sv[j] == NULL) continue;
 
+         vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);;
+         dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
+
          dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_mv[j]);
-         vl_mc_render_ref(&buf->mc[i], sv[j][i], ne_start, ne_num, e_start, e_num);
+         vl_mc_render_ref(&buf->mc[i], sv[j][i]);
       }
+   }
 
-      dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_eb[i]);
+   vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream);
+   dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
 
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_eb[i]);
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-         vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], ne_num);
-
-      vl_mc_render_ycbcr(&buf->mc[i], ne_start, ne_num);
+         vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_instances);
 
+      vl_mc_render_ycbcr(&buf->mc[i], num_instances);
    }
+
    dec->pipe->flush(dec->pipe, fence);
 }
 
@@ -503,11 +508,10 @@ static void
 vl_mpeg12_decoder_clear_buffer(struct pipe_video_decode_buffer *buffer)
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
-   unsigned ne_start, ne_num, e_start, e_num;
 
    assert(buf);
 
-   vl_vb_restart(&buf->vertex_stream, &ne_start, &ne_num, &e_start, &e_num);
+   vl_vb_restart(&buf->vertex_stream);
 }
 
 static bool
@@ -691,18 +695,24 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer;
    dec->base.clear_buffer = vl_mpeg12_decoder_clear_buffer;
 
+   dec->base.width = align(width, MACROBLOCK_WIDTH);
+   dec->base.height = align(height, MACROBLOCK_HEIGHT);
+
    dec->pipe = pipe;
 
    dec->quads = vl_vb_upload_quads(dec->pipe, 2, 2);
+   dec->pos = vl_vb_upload_pos(
+      dec->pipe,
+      dec->base.width / MACROBLOCK_WIDTH,
+      dec->base.height / MACROBLOCK_HEIGHT
+   );
+
    for (i = 0; i < VL_MAX_PLANES; ++i)
       dec->ves_eb[i] = vl_vb_get_ves_eb(dec->pipe, i);
 
    for (i = 0; i < 2; ++i)
       dec->ves_mv[i] = vl_vb_get_ves_mv(dec->pipe, i);
 
-   dec->base.width = align(width, MACROBLOCK_WIDTH);
-   dec->base.height = align(height, MACROBLOCK_HEIGHT);
-
    /* TODO: Implement 422, 444 */
    assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
    dec->empty_block_mask = &const_empty_block_mask_420;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index ffb9e56a13c..2bd5591b463 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -52,6 +52,8 @@ struct vl_mpeg12_decoder
    enum pipe_format mc_source_format;
 
    struct pipe_vertex_buffer quads;
+   struct pipe_vertex_buffer pos;
+
    void *ves_eb[VL_MAX_PLANES];
    void *ves_mv[2];
 
@@ -71,14 +73,6 @@ struct vl_mpeg12_buffer
    struct pipe_video_buffer *idct_intermediate;
    struct pipe_video_buffer *mc_source;
 
-   union
-   {
-      struct pipe_vertex_buffer all[2];
-      struct {
-         struct pipe_vertex_buffer quad, stream;
-      } individual;
-   } vertex_bufs;
-
    struct vl_idct_buffer idct[VL_MAX_PLANES];
    struct vl_mc_buffer mc[VL_MAX_PLANES];
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 84dfc9eccf6..c923686b06e 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -37,7 +37,11 @@ struct vl_vertex_stream
    uint8_t dct_type_field;
    uint8_t dummy[2];
    uint8_t eb[3][2][2];
-   struct vertex4s mv[4];
+};
+
+struct vl_mv_vertex_stream
+{
+   struct vertex4s mv[2];
 };
 
 /* vertices for a quad covering a block */
@@ -96,6 +100,52 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned blocks_x, unsigned blocks
    return quad;
 }
 
+struct pipe_vertex_buffer
+vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height)
+{
+   struct pipe_vertex_buffer pos;
+   struct pipe_transfer *buf_transfer;
+   struct vertex2s *v;
+
+   unsigned x, y;
+
+   assert(pipe);
+
+   /* create buffer */
+   pos.stride = sizeof(struct vertex2s);
+   pos.buffer_offset = 0;
+   pos.buffer = pipe_buffer_create
+   (
+      pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      PIPE_USAGE_STATIC,
+      sizeof(struct vertex2s) * width * height
+   );
+
+   if(!pos.buffer)
+      return pos;
+
+   /* and fill it */
+   v = pipe_buffer_map
+   (
+      pipe,
+      pos.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   for ( y = 0; y < height; ++y) {
+      for ( x = 0; x < width; ++x, ++v) {
+         v->x = x;
+         v->y = y;
+      }
+   }
+
+   pipe_buffer_unmap(pipe, buf_transfer);
+
+   return pos;
+}
+
 static struct pipe_vertex_element
 vl_vb_get_quad_vertex_element(void)
 {
@@ -159,50 +209,83 @@ vl_vb_get_ves_mv(struct pipe_context *pipe, int motionvector)
 {
    struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
 
+   assert(pipe);
+
    memset(&vertex_elems, 0, sizeof(vertex_elems));
    vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
 
-   assert(pipe);
-
    /* Position element */
    vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
 
+   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1);
+
    /* motion vector TOP element */
-   vertex_elems[VS_I_MV_TOP].src_offset = offsetof(struct vl_vertex_stream, mv[motionvector * 2]);
    vertex_elems[VS_I_MV_TOP].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED;
 
    /* motion vector BOTTOM element */
    vertex_elems[VS_I_MV_BOTTOM].src_format = PIPE_FORMAT_R16G16B16A16_SSCALED;
 
-   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - 1, 1);
+   vl_vb_element_helper(&vertex_elems[VS_I_MV_TOP], 2, 2);
 
    return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems);
 }
 
-struct pipe_vertex_buffer
-vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned size)
+void
+vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned width, unsigned height)
 {
-   struct pipe_vertex_buffer buf;
+   unsigned i;
 
    assert(buffer);
 
-   buffer->size = size;
-   buffer->num_not_empty = 0;
-   buffer->num_empty = 0;
+   buffer->width = width;
+   buffer->height = height;
+   buffer->num_instances = 0;
 
-   buf.stride = sizeof(struct vl_vertex_stream);
-   buf.buffer_offset = 0;
-   buf.buffer = pipe_buffer_create
+   buffer->resource = pipe_buffer_create
    (
       pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
       PIPE_USAGE_STREAM,
-      sizeof(struct vl_vertex_stream) * size
+      sizeof(struct vl_vertex_stream) * width * height
    );
 
-   pipe_resource_reference(&buffer->resource, buf.buffer);
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
+      buffer->mv[i].resource = pipe_buffer_create
+      (
+         pipe->screen,
+         PIPE_BIND_VERTEX_BUFFER,
+         PIPE_USAGE_STREAM,
+         sizeof(struct vl_mv_vertex_stream) * width * height
+      );
+   }
 
    vl_vb_map(buffer, pipe);
+}
+
+struct pipe_vertex_buffer
+vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer)
+{
+   struct pipe_vertex_buffer buf;
+
+   assert(buffer);
+
+   buf.stride = sizeof(struct vl_vertex_stream);
+   buf.buffer_offset = 0;
+   buf.buffer = buffer->resource;
+
+   return buf;
+}
+
+struct pipe_vertex_buffer
+vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector)
+{
+   struct pipe_vertex_buffer buf;
+
+   assert(buffer);
+
+   buf.stride = sizeof(struct vl_mv_vertex_stream);
+   buf.buffer_offset = 0;
+   buf.buffer = buffer->mv[motionvector].resource;
 
    return buf;
 }
@@ -210,110 +293,119 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned
 void
 vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 {
+   unsigned i;
+
    assert(buffer && pipe);
 
-   buffer->start = pipe_buffer_map
+   buffer->buffer = pipe_buffer_map
    (
       pipe,
       buffer->resource,
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &buffer->transfer
    );
-   buffer->end = buffer->start + buffer->resource->width0 / sizeof(struct vl_vertex_stream);
+
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
+      buffer->mv[i].vertex_stream = pipe_buffer_map
+      (
+         pipe,
+         buffer->mv[i].resource,
+         PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+         &buffer->mv[i].transfer
+      );
+   }
+
 }
 
 static void
-get_motion_vectors(struct pipe_mpeg12_macroblock *mb, struct vertex4s mv[4])
+get_motion_vectors(enum pipe_mpeg12_motion_type mo_type, struct pipe_motionvector *src, struct vertex4s dst[2])
 {
-   if (mb->mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-      mv[0].x = mv[1].x = mb->mv[0].top.x;
-      mv[0].y = mv[1].y = mb->mv[0].top.y;
-      mv[0].z = mv[1].z = 0;
-
-      mv[2].x = mv[3].x = mb->mv[1].top.x;
-      mv[2].y = mv[3].y = mb->mv[1].top.y;
-      mv[2].z = mv[3].z = 0;
+   if (mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
+      dst[0].x = dst[1].x = src->top.x;
+      dst[0].y = dst[1].y = src->top.y;
+      dst[0].z = dst[1].z = 0;
 
    } else {
-      mv[0].x = mb->mv[0].top.x;
-      mv[0].y = mb->mv[0].top.y;
-      mv[0].z = mb->mv[0].top.field_select ? 3 : 1;
-
-      mv[1].x = mb->mv[0].bottom.x;
-      mv[1].y = mb->mv[0].bottom.y;
-      mv[1].z = mb->mv[0].bottom.field_select ? 3 : 1;
+      dst[0].x = src->top.x;
+      dst[0].y = src->top.y;
+      dst[0].z = src->top.field_select ? 3 : 1;
 
-      mv[2].x = mb->mv[1].top.x;
-      mv[2].y = mb->mv[1].top.y;
-      mv[2].z = mb->mv[1].top.field_select ? 3 : 1;
-
-      mv[3].x = mb->mv[1].bottom.x;
-      mv[3].y = mb->mv[1].bottom.y;
-      mv[3].z = mb->mv[1].bottom.field_select ? 3 : 1;
+      dst[1].x = src->bottom.x;
+      dst[1].y = src->bottom.y;
+      dst[1].z = src->bottom.field_select ? 3 : 1;
    }
 
-   mv[0].w = mv[1].w = mb->mv[0].wheight;
-   mv[2].w = mv[3].w = mb->mv[1].wheight;
+   dst[0].w = src->top.wheight;
+   dst[1].w = src->bottom.wheight;
 }
 
 void
 vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb,
                 const unsigned (*empty_block_mask)[3][2][2])
 {
-   struct vl_vertex_stream *stream;
    unsigned i, j, k;
+   unsigned mv_pos;
 
    assert(buffer);
    assert(mb);
-   assert(buffer->num_not_empty + buffer->num_empty < buffer->size);
+   assert(buffer->num_instances < buffer->width * buffer->height);
 
-   if(mb->cbp)
-      stream = buffer->start + buffer->num_not_empty++;
-   else
-      stream = buffer->end - ++buffer->num_empty;
+   if(mb->cbp) {
+      struct vl_vertex_stream *stream;
+      stream = buffer->buffer + buffer->num_instances++;
 
-   stream->pos.x = mb->mbx;
-   stream->pos.y = mb->mby;
+      stream->pos.x = mb->mbx;
+      stream->pos.y = mb->mby;
 
-   for ( i = 0; i < 3; ++i)
-      for ( j = 0; j < 2; ++j)
-         for ( k = 0; k < 2; ++k)
-            stream->eb[i][j][k] = !(mb->cbp & (*empty_block_mask)[i][j][k]);
+      for ( i = 0; i < 3; ++i)
+         for ( j = 0; j < 2; ++j)
+            for ( k = 0; k < 2; ++k)
+               stream->eb[i][j][k] = !(mb->cbp & (*empty_block_mask)[i][j][k]);
 
-   stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
-   stream->mb_type_intra = mb->dct_intra;
+      stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
+      stream->mb_type_intra = mb->dct_intra;
+   }
 
-   get_motion_vectors(mb, stream->mv);
+   mv_pos = mb->mbx + mb->mby * buffer->width;
+   get_motion_vectors(mb->mo_type, &mb->mv[0], buffer->mv[0].vertex_stream[mv_pos].mv);
+   get_motion_vectors(mb->mo_type, &mb->mv[1], buffer->mv[1].vertex_stream[mv_pos].mv);
 }
 
 void
 vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 {
+   unsigned i;
+
    assert(buffer && pipe);
 
    pipe_buffer_unmap(pipe, buffer->transfer);
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
+      pipe_buffer_unmap(pipe, buffer->mv[i].transfer);
+   }
 }
 
-void
-vl_vb_restart(struct vl_vertex_buffer *buffer,
-              unsigned *not_empty_start_instance, unsigned *not_empty_num_instances,
-              unsigned *empty_start_instance, unsigned *empty_num_instances)
+unsigned
+vl_vb_restart(struct vl_vertex_buffer *buffer)
 {
-   assert(buffer);
+   unsigned num_instances;
 
-   *not_empty_start_instance = 0;
-   *not_empty_num_instances = buffer->num_not_empty;
-   *empty_start_instance = buffer->size - buffer->num_empty;
-   *empty_num_instances = buffer->num_empty;
+   assert(buffer);
 
-   buffer->num_not_empty = 0;
-   buffer->num_empty = 0;
+   num_instances = buffer->num_instances;
+   buffer->num_instances = 0;
+   return num_instances;
 }
 
 void
 vl_vb_cleanup(struct vl_vertex_buffer *buffer)
 {
+   unsigned i;
+
    assert(buffer);
 
    pipe_resource_reference(&buffer->resource, NULL);
+
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
+      pipe_resource_reference(&buffer->mv[i].resource, NULL);
+   }
 }
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 62f7bf00508..bafaff7a311 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -30,6 +30,7 @@
 #include <pipe/p_state.h>
 #include <pipe/p_video_state.h>
 
+#include "vl_defines.h"
 #include "vl_types.h"
 
 /* vertex buffers act as a todo list
@@ -54,25 +55,36 @@ enum VS_INPUT
 
 struct vl_vertex_buffer
 {
-   unsigned size;
-   unsigned num_not_empty;
-   unsigned num_empty;
+   unsigned width, height;
+   unsigned num_instances;
+
    struct pipe_resource *resource;
    struct pipe_transfer *transfer;
-   struct vl_vertex_stream *start;
-   struct vl_vertex_stream *end;
+   struct vl_vertex_stream *buffer;
+
+   struct {
+      struct pipe_resource       *resource;
+      struct pipe_transfer       *transfer;
+      struct vl_mv_vertex_stream *vertex_stream;
+   } mv[VL_MAX_REF_FRAMES];
 };
 
 struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe,
                                              unsigned blocks_x, unsigned blocks_y);
 
+struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height);
+
 void *vl_vb_get_ves_eb(struct pipe_context *pipe, int component);
 
 void *vl_vb_get_ves_mv(struct pipe_context *pipe, int motionvector);
 
-struct pipe_vertex_buffer vl_vb_init(struct vl_vertex_buffer *buffer,
-                                     struct pipe_context *pipe,
-                                     unsigned max_blocks);
+void vl_vb_init(struct vl_vertex_buffer *buffer,
+                struct pipe_context *pipe,
+                unsigned width, unsigned height);
+
+struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer);
+
+struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector);
 
 void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
@@ -81,9 +93,7 @@ void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macrobl
 
 void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
-void vl_vb_restart(struct vl_vertex_buffer *buffer,
-                   unsigned *not_empty_start_instance, unsigned *not_empty_num_instances,
-                   unsigned *empty_start_instance, unsigned *empty_num_instances);
+unsigned vl_vb_restart(struct vl_vertex_buffer *buffer);
 
 void vl_vb_cleanup(struct vl_vertex_buffer *buffer);
 
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h
index f5c424cf296..960acd28060 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.h
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.h
@@ -31,7 +31,7 @@
 #include <pipe/p_context.h>
 #include <pipe/p_video_context.h>
 
-#define VL_MAX_PLANES 3
+#include "vl_defines.h"
 
 /**
  * implementation of a planar ycbcr buffer
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 72a27938847..2cf5ea4d975 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -69,10 +69,13 @@ struct pipe_macroblock
    enum pipe_video_codec codec;
 };
 
-struct pipe_mpeg12_motionvector
+struct pipe_motionvector
 {
-   signed x, y;
-   bool field_select;
+   struct {
+      signed x, y;
+      bool field_select;
+      unsigned wheight:8;
+   } top, bottom;
 };
 
 struct pipe_mpeg12_macroblock
@@ -84,10 +87,7 @@ struct pipe_mpeg12_macroblock
    enum pipe_mpeg12_motion_type mo_type;
    bool dct_intra;
    enum pipe_mpeg12_dct_type dct_type;
-   struct {
-      unsigned wheight:8;
-      struct pipe_mpeg12_motionvector top, bottom;
-   } mv[2];
+   struct pipe_motionvector mv[2];
    unsigned cbp;
    short *blocks;
 };
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index f22d315c90d..9752497eb30 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -114,28 +114,28 @@ MacroBlocksToPipe(struct pipe_screen *screen,
 
       switch (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) {
       case XVMC_MB_TYPE_MOTION_FORWARD:
-         mb->mv[0].wheight = 255;
-         mb->mv[1].wheight = 0;
+         mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 255;
+         mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 0;
          break;
 
       case (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD):
-         mb->mv[0].wheight = 127;
-         mb->mv[1].wheight = 127;
+         mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 127;
+         mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 127;
          break;
 
       case XVMC_MB_TYPE_MOTION_BACKWARD:
-         mb->mv[0].wheight = 0;
-         mb->mv[1].wheight = 255;
+         mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 0;
+         mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 255;
          break;
 
       default:
          /* workaround for xines xxmc video out plugin */
          if (!(xvmc_mb->macroblock_type & ~XVMC_MB_TYPE_PATTERN)) {
-            mb->mv[0].wheight = 255;
-            mb->mv[1].wheight = 0;
+            mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 255;
+            mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 0;
          } else {
-            mb->mv[0].wheight = 0;
-            mb->mv[1].wheight = 0;
+            mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 0;
+            mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 0;
          }
          break;
       }
-- 
cgit v1.2.3


From 38a315b7049946d124409b377e622994feccdcb7 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 17 Apr 2011 21:31:31 +0200
Subject: [g3dvl] no need for seperate mv vertex states any more

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 8 +++-----
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h | 2 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.c | 2 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.h | 2 +-
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 1d8f0f92c37..10c9c646a51 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -289,8 +289,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    for (i = 0; i < VL_MAX_PLANES; ++i)
       dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_eb[i]);
 
-   for (i = 0; i < 2; ++i)
-      dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv[i]);
+   dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv);
 
    pipe_resource_reference(&dec->quads.buffer, NULL);
    pipe_resource_reference(&dec->pos.buffer, NULL);
@@ -476,6 +475,7 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
    vb[0] = dec->quads;
    vb[1] = dec->pos;
 
+   dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_mv);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
       vl_mc_set_surface(&buf->mc[i], surfaces[i]);
 
@@ -485,7 +485,6 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
          vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);;
          dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
 
-         dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_mv[j]);
          vl_mc_render_ref(&buf->mc[i], sv[j][i]);
       }
    }
@@ -710,8 +709,7 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    for (i = 0; i < VL_MAX_PLANES; ++i)
       dec->ves_eb[i] = vl_vb_get_ves_eb(dec->pipe, i);
 
-   for (i = 0; i < 2; ++i)
-      dec->ves_mv[i] = vl_vb_get_ves_mv(dec->pipe, i);
+   dec->ves_mv = vl_vb_get_ves_mv(dec->pipe);
 
    /* TODO: Implement 422, 444 */
    assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 2bd5591b463..6a6fabdb188 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -55,7 +55,7 @@ struct vl_mpeg12_decoder
    struct pipe_vertex_buffer pos;
 
    void *ves_eb[VL_MAX_PLANES];
-   void *ves_mv[2];
+   void *ves_mv;
 
    struct vl_idct idct_y, idct_c;
    struct vl_mc mc_y, mc_c;
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index c923686b06e..76142559cc8 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -205,7 +205,7 @@ vl_vb_get_ves_eb(struct pipe_context *pipe, int component)
 }
 
 void *
-vl_vb_get_ves_mv(struct pipe_context *pipe, int motionvector)
+vl_vb_get_ves_mv(struct pipe_context *pipe)
 {
    struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index bafaff7a311..ce169ccf640 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -76,7 +76,7 @@ struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned w
 
 void *vl_vb_get_ves_eb(struct pipe_context *pipe, int component);
 
-void *vl_vb_get_ves_mv(struct pipe_context *pipe, int motionvector);
+void *vl_vb_get_ves_mv(struct pipe_context *pipe);
 
 void vl_vb_init(struct vl_vertex_buffer *buffer,
                 struct pipe_context *pipe,
-- 
cgit v1.2.3


From 849bc838e81f930e6f090e6c6597bb92e822b4c9 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 17 Apr 2011 23:21:32 +0200
Subject: [g3dvl] give each color component their own vertex buffer

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c |  27 +++---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h |   2 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.c | 119 ++++++++++++++++-----------
 src/gallium/auxiliary/vl/vl_vertex_buffers.h |  16 ++--
 4 files changed, 91 insertions(+), 73 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 10c9c646a51..7e72fbaee3f 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -268,7 +268,6 @@ static void
 vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
 {
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
-   unsigned i;
 
    assert(decoder);
 
@@ -286,9 +285,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
       vl_idct_cleanup(&dec->idct_c);
    }
 
-   for (i = 0; i < VL_MAX_PLANES; ++i)
-      dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_eb[i]);
-
+   dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
    dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv);
 
    pipe_resource_reference(&dec->quads.buffer, NULL);
@@ -457,7 +454,6 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 
    struct pipe_vertex_buffer vb[3];
 
-   unsigned num_instances;
    unsigned i, j;
 
    assert(buf);
@@ -470,8 +466,6 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 
    surfaces = dst->get_surfaces(dst);
 
-   num_instances = vl_vb_restart(&buf->vertex_stream);
-
    vb[0] = dec->quads;
    vb[1] = dec->pos;
 
@@ -489,11 +483,13 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
       }
    }
 
-   vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream);
-   dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
-
+   dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
-      dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_eb[i]);
+      unsigned num_instances = vl_vb_restart(&buf->vertex_stream, i);
+
+      vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
+      dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
+
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
          vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_instances);
 
@@ -507,10 +503,12 @@ static void
 vl_mpeg12_decoder_clear_buffer(struct pipe_video_decode_buffer *buffer)
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
+   unsigned i;
 
    assert(buf);
 
-   vl_vb_restart(&buf->vertex_stream);
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      vl_vb_restart(&buf->vertex_stream, i);
 }
 
 static bool
@@ -673,7 +671,6 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
 {
    struct vl_mpeg12_decoder *dec;
    float mc_scale;
-   unsigned i;
 
    assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
 
@@ -706,9 +703,7 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
       dec->base.height / MACROBLOCK_HEIGHT
    );
 
-   for (i = 0; i < VL_MAX_PLANES; ++i)
-      dec->ves_eb[i] = vl_vb_get_ves_eb(dec->pipe, i);
-
+   dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->pipe);
    dec->ves_mv = vl_vb_get_ves_mv(dec->pipe);
 
    /* TODO: Implement 422, 444 */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 6a6fabdb188..c961e433b51 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -54,7 +54,7 @@ struct vl_mpeg12_decoder
    struct pipe_vertex_buffer quads;
    struct pipe_vertex_buffer pos;
 
-   void *ves_eb[VL_MAX_PLANES];
+   void *ves_ycbcr;
    void *ves_mv;
 
    struct vl_idct idct_y, idct_c;
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 76142559cc8..89815c49e68 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -30,13 +30,13 @@
 #include "vl_vertex_buffers.h"
 #include "vl_types.h"
 
-struct vl_vertex_stream
+struct vl_ycbcr_vertex_stream
 {
    struct vertex2s pos;
    uint8_t mb_type_intra;
    uint8_t dct_type_field;
    uint8_t dummy[2];
-   uint8_t eb[3][2][2];
+   uint8_t eb[2][2];
 };
 
 struct vl_mv_vertex_stream
@@ -169,10 +169,7 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements
    assert(elements && num_elements);
 
    for ( i = 0; i < num_elements; ++i ) {
-      if (elements[i].src_offset)
-         offset = elements[i].src_offset;
-      else
-         elements[i].src_offset = offset;
+      elements[i].src_offset = offset;
       elements[i].instance_divisor = 1;
       elements[i].vertex_buffer_index = vertex_buffer_index;
       offset += util_format_get_blocksize(elements[i].src_format);
@@ -180,7 +177,7 @@ vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements
 }
 
 void *
-vl_vb_get_ves_eb(struct pipe_context *pipe, int component)
+vl_vb_get_ves_ycbcr(struct pipe_context *pipe)
 {
    struct pipe_vertex_element vertex_elems[NUM_VS_INPUTS];
 
@@ -196,7 +193,6 @@ vl_vb_get_ves_eb(struct pipe_context *pipe, int component)
    vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
 
    /* empty block element of selected component */
-   vertex_elems[VS_I_EB].src_offset = offsetof(struct vl_vertex_stream, eb[component]);
    vertex_elems[VS_I_EB].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
 
    vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - 1, 1);
@@ -233,21 +229,25 @@ vl_vb_get_ves_mv(struct pipe_context *pipe)
 void
 vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned width, unsigned height)
 {
-   unsigned i;
+   unsigned i, size;
 
    assert(buffer);
 
    buffer->width = width;
    buffer->height = height;
-   buffer->num_instances = 0;
 
-   buffer->resource = pipe_buffer_create
-   (
-      pipe->screen,
-      PIPE_BIND_VERTEX_BUFFER,
-      PIPE_USAGE_STREAM,
-      sizeof(struct vl_vertex_stream) * width * height
-   );
+   size = width * height;
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      buffer->ycbcr[i].num_instances = 0;
+      buffer->ycbcr[i].resource = pipe_buffer_create
+      (
+         pipe->screen,
+         PIPE_BIND_VERTEX_BUFFER,
+         PIPE_USAGE_STREAM,
+         sizeof(struct vl_ycbcr_vertex_stream) * size
+      );
+   }
 
    for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
       buffer->mv[i].resource = pipe_buffer_create
@@ -255,7 +255,7 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned
          pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
          PIPE_USAGE_STREAM,
-         sizeof(struct vl_mv_vertex_stream) * width * height
+         sizeof(struct vl_mv_vertex_stream) * size
       );
    }
 
@@ -263,15 +263,15 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned
 }
 
 struct pipe_vertex_buffer
-vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer)
+vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component)
 {
    struct pipe_vertex_buffer buf;
 
    assert(buffer);
 
-   buf.stride = sizeof(struct vl_vertex_stream);
+   buf.stride = sizeof(struct vl_ycbcr_vertex_stream);
    buf.buffer_offset = 0;
-   buf.buffer = buffer->resource;
+   buf.buffer = buffer->ycbcr[component].resource;
 
    return buf;
 }
@@ -297,13 +297,15 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 
    assert(buffer && pipe);
 
-   buffer->buffer = pipe_buffer_map
-   (
-      pipe,
-      buffer->resource,
-      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-      &buffer->transfer
-   );
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      buffer->ycbcr[i].vertex_stream = pipe_buffer_map
+      (
+         pipe,
+         buffer->ycbcr[i].resource,
+         PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+         &buffer->ycbcr[i].transfer
+      );
+   }
 
    for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
       buffer->mv[i].vertex_stream = pipe_buffer_map
@@ -339,31 +341,45 @@ get_motion_vectors(enum pipe_mpeg12_motion_type mo_type, struct pipe_motionvecto
    dst[1].w = src->bottom.wheight;
 }
 
+static bool
+get_ycbcr_vectors(struct vl_ycbcr_vertex_stream *stream,
+                  struct pipe_mpeg12_macroblock *mb, const unsigned (*empty_block_mask)[2][2])
+{
+   bool completely_empty = true;
+   unsigned i, j;
+
+   stream->pos.x = mb->mbx;
+   stream->pos.y = mb->mby;
+   stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
+   stream->mb_type_intra = mb->dct_intra;
+
+   for ( i = 0; i < 2; ++i)
+      for ( j = 0; j < 2; ++j) {
+         bool empty = !(mb->cbp & (*empty_block_mask)[i][j]);
+         stream->eb[i][j] = empty;
+         completely_empty &= empty;
+      }
+
+   return !completely_empty;
+}
+
 void
 vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb,
                 const unsigned (*empty_block_mask)[3][2][2])
 {
-   unsigned i, j, k;
-   unsigned mv_pos;
+   unsigned i, mv_pos;
 
    assert(buffer);
    assert(mb);
-   assert(buffer->num_instances < buffer->width * buffer->height);
 
    if(mb->cbp) {
-      struct vl_vertex_stream *stream;
-      stream = buffer->buffer + buffer->num_instances++;
-
-      stream->pos.x = mb->mbx;
-      stream->pos.y = mb->mby;
-
-      for ( i = 0; i < 3; ++i)
-         for ( j = 0; j < 2; ++j)
-            for ( k = 0; k < 2; ++k)
-               stream->eb[i][j][k] = !(mb->cbp & (*empty_block_mask)[i][j][k]);
-
-      stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
-      stream->mb_type_intra = mb->dct_intra;
+      for (i = 0; i < VL_MAX_PLANES; ++i) {
+         assert(buffer->ycbcr[i].num_instances < buffer->width * buffer->height);
+         if (get_ycbcr_vectors(buffer->ycbcr[i].vertex_stream, mb, &(*empty_block_mask)[i])) {
+            buffer->ycbcr[i].vertex_stream++;
+            buffer->ycbcr[i].num_instances++;
+         }
+      }
    }
 
    mv_pos = mb->mbx + mb->mby * buffer->width;
@@ -378,21 +394,24 @@ vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 
    assert(buffer && pipe);
 
-   pipe_buffer_unmap(pipe, buffer->transfer);
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      pipe_buffer_unmap(pipe, buffer->ycbcr[i].transfer);
+   }
+
    for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
       pipe_buffer_unmap(pipe, buffer->mv[i].transfer);
    }
 }
 
 unsigned
-vl_vb_restart(struct vl_vertex_buffer *buffer)
+vl_vb_restart(struct vl_vertex_buffer *buffer, int component)
 {
    unsigned num_instances;
 
    assert(buffer);
 
-   num_instances = buffer->num_instances;
-   buffer->num_instances = 0;
+   num_instances = buffer->ycbcr[component].num_instances;
+   buffer->ycbcr[component].num_instances = 0;
    return num_instances;
 }
 
@@ -403,7 +422,9 @@ vl_vb_cleanup(struct vl_vertex_buffer *buffer)
 
    assert(buffer);
 
-   pipe_resource_reference(&buffer->resource, NULL);
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
+      pipe_resource_reference(&buffer->ycbcr[i].resource, NULL);
+   }
 
    for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
       pipe_resource_reference(&buffer->mv[i].resource, NULL);
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index ce169ccf640..3d9c6141c5c 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -56,11 +56,13 @@ enum VS_INPUT
 struct vl_vertex_buffer
 {
    unsigned width, height;
-   unsigned num_instances;
 
-   struct pipe_resource *resource;
-   struct pipe_transfer *transfer;
-   struct vl_vertex_stream *buffer;
+   struct {
+      unsigned                      num_instances;
+      struct pipe_resource          *resource;
+      struct pipe_transfer          *transfer;
+      struct vl_ycbcr_vertex_stream *vertex_stream;
+   } ycbcr[VL_MAX_PLANES];
 
    struct {
       struct pipe_resource       *resource;
@@ -74,7 +76,7 @@ struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe,
 
 struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height);
 
-void *vl_vb_get_ves_eb(struct pipe_context *pipe, int component);
+void *vl_vb_get_ves_ycbcr(struct pipe_context *pipe);
 
 void *vl_vb_get_ves_mv(struct pipe_context *pipe);
 
@@ -82,7 +84,7 @@ void vl_vb_init(struct vl_vertex_buffer *buffer,
                 struct pipe_context *pipe,
                 unsigned width, unsigned height);
 
-struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer);
+struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component);
 
 struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector);
 
@@ -93,7 +95,7 @@ void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macrobl
 
 void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
-unsigned vl_vb_restart(struct vl_vertex_buffer *buffer);
+unsigned vl_vb_restart(struct vl_vertex_buffer *buffer, int component);
 
 void vl_vb_cleanup(struct vl_vertex_buffer *buffer);
 
-- 
cgit v1.2.3


From 3511780a43077d1359bd491eadb4ab9b3b86795a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 19 Apr 2011 21:06:59 +0200
Subject: [g3dvl] revert commit 310eea52ca1e997295c84163066cc5d0fd4f8cf6

Using a seperate vertex buffer for mc and ycbcr handling is still better.
---
 src/gallium/auxiliary/vl/vl_idct.c           |  93 ++-----
 src/gallium/auxiliary/vl/vl_idct.h           |   2 -
 src/gallium/auxiliary/vl/vl_mc.c             | 358 +++++++++++++--------------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c |  27 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.c | 110 +++-----
 src/gallium/auxiliary/vl/vl_vertex_buffers.h |  17 +-
 6 files changed, 252 insertions(+), 355 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 4a500a7489c..ca3b1cb53ab 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -90,11 +90,10 @@ static void *
 create_vert_shader(struct vl_idct *idct, bool matrix_stage)
 {
    struct ureg_program *shader;
-   struct ureg_src vrect, vpos, vblock, eb;
-   struct ureg_src scale, blocks_xy;
+   struct ureg_src vrect, vpos;
+   struct ureg_src scale;
    struct ureg_dst t_tex, t_start;
    struct ureg_dst o_vpos, o_l_addr[2], o_r_addr[2];
-   unsigned label;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
@@ -105,12 +104,9 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage)
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
-   vblock = ureg_swizzle(vrect, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
 
-   eb = ureg_DECL_vs_input(shader, VS_I_EB);
-
    o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
    o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
 
@@ -119,75 +115,39 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage)
 
    /*
     * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
-    * blocks_xy = (blocks_x, blocks_y)
-    *
-    * if eb.(vblock.y, vblock.x)
-    *    o_vpos.xy = -1
-    * else
-    *    t_tex = vpos * blocks_xy + vblock
-    *    t_start = t_tex * scale
-    *    t_tex = t_tex + vrect
-    *    o_vpos.xy = t_tex * scale
     *
-    *    o_l_addr = calc_addr(...)
-    *    o_r_addr = calc_addr(...)
-    * endif
+    * t_vpos = vpos + vrect
+    * o_vpos.xy = t_vpos * scale
     * o_vpos.zw = vpos
     *
+    * o_l_addr = calc_addr(...)
+    * o_r_addr = calc_addr(...)
+    *
     */
 
    scale = ureg_imm2f(shader,
       (float)BLOCK_WIDTH / idct->buffer_width,
       (float)BLOCK_HEIGHT / idct->buffer_height);
 
-   blocks_xy = ureg_imm2f(shader, idct->blocks_x, idct->blocks_y);
+   ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
+   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
+   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
+      ureg_scalar(vrect, TGSI_SWIZZLE_X),
+      ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets));
 
-   if (idct->blocks_x > 1 || idct->blocks_y > 1) {
-      ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY),
-         ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_Y)),
-         ureg_swizzle(eb, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W),
-         ureg_swizzle(eb, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y));
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
 
-      ureg_CMP(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_X),
-         ureg_negate(ureg_scalar(vblock, TGSI_SWIZZLE_X)),
-         ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_Y),
-         ureg_scalar(ureg_src(t_tex), TGSI_SWIZZLE_X));
+   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
 
-      eb = ureg_src(t_tex);
+   if(matrix_stage) {
+      calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
+      calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
+   } else {
+      calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
+      calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
    }
 
-   ureg_IF(shader, ureg_scalar(eb, TGSI_SWIZZLE_X), &label);
-
-      ureg_MOV(shader, o_vpos, ureg_imm1f(shader, -1.0f));
-
-   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
-   ureg_ELSE(shader, &label);
-
-      ureg_MAD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, blocks_xy, vblock);
-      ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
-
-      ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), vrect);
-
-      ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
-      ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
-         ureg_scalar(vrect, TGSI_SWIZZLE_X),
-         ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets));
-
-      ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
-
-      if(matrix_stage) {
-         calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
-         calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
-      } else {
-         calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
-         calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
-      }
-
-   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
-   ureg_ENDIF(shader);
-
-   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
-
    ureg_release_temporary(shader, t_tex);
    ureg_release_temporary(shader, t_start);
 
@@ -607,7 +567,6 @@ error_matrix:
 
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
-                  unsigned blocks_x, unsigned blocks_y,
                   unsigned nr_of_render_targets,
                   struct pipe_sampler_view *matrix,
                   struct pipe_sampler_view *transpose)
@@ -617,8 +576,6 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
    idct->pipe = pipe;
    idct->buffer_width = buffer_width;
    idct->buffer_height = buffer_height;
-   idct->blocks_x = blocks_x;
-   idct->blocks_y = blocks_y;
    idct->nr_of_render_targets = nr_of_render_targets;
 
    pipe_sampler_view_reference(&idct->matrix, matrix);
@@ -706,14 +663,10 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 void
 vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
 {
-   unsigned num_verts;
-
    assert(idct);
    assert(buffer);
 
    if(num_instances > 0) {
-      num_verts = idct->blocks_x * idct->blocks_y * 4;
-
       idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
       idct->pipe->bind_blend_state(idct->pipe, idct->blend);
       idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
@@ -724,7 +677,7 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
       idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
-      util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances);
+      util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
 
       /* second stage */
       idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]);
@@ -732,6 +685,6 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_
       idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
       idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
       idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
-      util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, num_verts, 0, num_instances);
+      util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
    }
 }
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 7f00e3e28b4..96933b9d889 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -39,7 +39,6 @@ struct vl_idct
 
    unsigned buffer_width;
    unsigned buffer_height;
-   unsigned blocks_x, blocks_y;
    unsigned nr_of_render_targets;
 
    void *rs_state;
@@ -77,7 +76,6 @@ struct pipe_sampler_view *vl_idct_upload_matrix(struct pipe_context *pipe, float
 /* init an idct instance */
 bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   unsigned buffer_width, unsigned buffer_height,
-                  unsigned blocks_x, unsigned blocks_y,
                   unsigned nr_of_render_targets,
                   struct pipe_sampler_view *matrix,
                   struct pipe_sampler_view *transpose);
diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
index 2624d0502c1..9785327cdbe 100644
--- a/src/gallium/auxiliary/vl/vl_mc.c
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -41,15 +41,13 @@
 enum VS_OUTPUT
 {
    VS_O_VPOS,
-   VS_O_LINE,
    VS_O_VTOP,
    VS_O_VBOTTOM
 };
 
 static struct ureg_dst
-calc_position(struct vl_mc *r, struct ureg_program *shader)
+calc_position(struct vl_mc *r, struct ureg_program *shader, struct ureg_src block_scale)
 {
-   struct ureg_src block_scale;
    struct ureg_src vrect, vpos;
    struct ureg_dst t_vpos;
    struct ureg_dst o_vpos;
@@ -68,111 +66,32 @@ calc_position(struct vl_mc *r, struct ureg_program *shader)
     * o_vpos.xy = t_vpos
     * o_vpos.zw = vpos
     */
-   block_scale = ureg_imm2f(shader,
-      (float)MACROBLOCK_WIDTH / r->buffer_width,
-      (float)MACROBLOCK_HEIGHT / r->buffer_height);
-
    ureg_ADD(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), vpos, vrect);
    ureg_MUL(shader, ureg_writemask(t_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos), block_scale);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), vpos);
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
 
    return t_vpos;
 }
 
-static void *
-create_ycbcr_vert_shader(struct vl_mc *r)
+static struct ureg_dst
+calc_line(struct ureg_program *shader)
 {
-   struct ureg_program *shader;
-   struct ureg_src block_scale;
-   struct ureg_src vrect, vpos, eb, flags;
-   struct ureg_dst t_vpos, t_vtex;
-   struct ureg_dst o_line, o_vtex[2];
-   unsigned label;
-
-   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
-   if (!shader)
-      return NULL;
-
-   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
-   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
-   eb = ureg_DECL_vs_input(shader, VS_I_EB);
-   flags = ureg_DECL_vs_input(shader, VS_I_FLAGS);
+   struct ureg_dst tmp;
+   struct ureg_src pos;
 
-   t_vpos = calc_position(r, shader);
-   t_vtex = ureg_DECL_temporary(shader);
+   tmp = ureg_DECL_temporary(shader);
 
-   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
-   o_vtex[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
-   o_vtex[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
+   pos = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS, TGSI_INTERPOLATE_LINEAR);
 
    /*
-    * block_scale = (MACROBLOCK_WIDTH, MACROBLOCK_HEIGHT) / (dst.width, dst.height)
-    *
-    * o_line.x = interlaced
-    * o_line.y = vrect
-    *
-    * o_vtex[0].z = vrect.x ? eb.y : eb.x
-    * o_vtex[1].z = vrect.x ? eb.w : eb.z
-    *
-    * if(interlaced) {
-    *    t_vtex.x = vrect.x
-    *    t_vtex.y = vrect.y * 0.5
-    *    t_vtex += vpos
-    *
-    *    o_vtex[0].xy = t_vtex * block_scale
-    *
-    *    t_vtex.y += 0.5
-    *    o_vtex[1].xy = t_vtex * block_scale
-    * } else {
-    *    o_vtex[0..1].xy = t_vpos
-    * }
-    * o_vtex[2].xy = t_vpos
-    *
+    * tmp.y = fraction(pos.y / 2) >= 0.5 ? 1 : 0
     */
-   block_scale = ureg_imm2f(shader,
-      (float)MACROBLOCK_WIDTH / r->buffer_width,
-      (float)MACROBLOCK_HEIGHT / r->buffer_height);
-
-   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_X), flags, ureg_imm1f(shader, 0.5f));
-   ureg_MOV(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y), vrect);
-
-   ureg_MOV(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-   ureg_CMP(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_Z),
-            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
-            ureg_scalar(eb, TGSI_SWIZZLE_Y),
-            ureg_scalar(eb, TGSI_SWIZZLE_X));
-
-   ureg_MOV(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-   ureg_CMP(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_Z),
-            ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_X)),
-            ureg_scalar(eb, TGSI_SWIZZLE_W),
-            ureg_scalar(eb, TGSI_SWIZZLE_Z));
-
-   if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO
-      ureg_IF(shader, ureg_scalar(flags, TGSI_SWIZZLE_Y), &label);
-
-         ureg_MOV(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_X), vrect);
-         ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), vrect, ureg_imm1f(shader, 0.5f));
-         ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY), vpos, ureg_src(t_vtex));
-         ureg_MUL(shader, ureg_writemask(o_vtex[0], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
-         ureg_ADD(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y), ureg_src(t_vtex), ureg_imm1f(shader, 0.5f));
-         ureg_MUL(shader, ureg_writemask(o_vtex[1], TGSI_WRITEMASK_XY), ureg_src(t_vtex), block_scale);
-
-         ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
-            ureg_scalar(vrect, TGSI_SWIZZLE_Y),
-            ureg_imm1f(shader, MACROBLOCK_HEIGHT / 2));
-
-      ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
-      ureg_ENDIF(shader);
-   }
-
-   ureg_release_temporary(shader, t_vtex);
-   ureg_release_temporary(shader, t_vpos);
-
-   ureg_END(shader);
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), pos, ureg_imm1f(shader, 0.5f));
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp));
+   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
 
-   return ureg_create_shader_and_destroy(shader, r->pipe);
+   return tmp;
 }
 
 static void *
@@ -182,7 +101,7 @@ create_ref_vert_shader(struct vl_mc *r)
    struct ureg_src mv_scale;
    struct ureg_src vrect, vmv[2];
    struct ureg_dst t_vpos;
-   struct ureg_dst o_vpos, o_line, o_vmv[2];
+   struct ureg_dst o_vpos, o_vmv[2];
    unsigned i;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
@@ -190,31 +109,29 @@ create_ref_vert_shader(struct vl_mc *r)
       return NULL;
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
-   ureg_DECL_vs_input(shader, VS_I_EB);
-   ureg_DECL_vs_input(shader, VS_I_FLAGS);
    vmv[0] = ureg_DECL_vs_input(shader, VS_I_MV_TOP);
    vmv[1] = ureg_DECL_vs_input(shader, VS_I_MV_BOTTOM);
 
-   t_vpos = calc_position(r, shader);
+   t_vpos = calc_position(r, shader, ureg_imm2f(shader,
+      (float)MACROBLOCK_WIDTH / r->buffer_width,
+      (float)MACROBLOCK_HEIGHT / r->buffer_height)
+   );
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
-   o_line = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE);
    o_vmv[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
    o_vmv[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM);
 
    /*
-    * mv_scale = 0.5 / (dst.width, dst.height);
+    * mv_scale.xy = 0.5 / (dst.width, dst.height);
+    * mv_scale.z = 1.0f / 4.0f
+    * mv_scale.w = 1.0f / 255.0f
     *
     * // Apply motion vectors
-    * o_vmv[0..3] = t_vpos + vmv[0..3] * mv_scale
-    *
-    * o_line.y = vrect
+    * o_vmv[0..1].xy = vmv[0..1] * mv_scale + t_vpos
+    * o_vmv[0..1].zw = vmv[0..1] * mv_scale
     *
     */
 
-   ureg_MUL(shader, ureg_writemask(o_line, TGSI_WRITEMASK_Y),
-      vrect, ureg_imm1f(shader, r->macroblock_size / 2));
-
    mv_scale = ureg_imm4f(shader,
       0.5f / r->buffer_width,
       0.5f / r->buffer_height,
@@ -233,90 +150,6 @@ create_ref_vert_shader(struct vl_mc *r)
    return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
-static struct ureg_dst
-calc_field(struct ureg_program *shader)
-{
-   struct ureg_dst tmp;
-   struct ureg_src line;
-
-   tmp = ureg_DECL_temporary(shader);
-
-   line = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_LINE, TGSI_INTERPOLATE_LINEAR);
-
-   /*
-    * line.x is flag for intra frames
-    * line.y going from 0 to 1 if not interlaced
-    * line.y going from 0 to 8 in steps of 0.5 if interlaced
-    *
-    * tmp.xy = fraction(line)
-    * tmp.xy = tmp.xy >= 0.5 ? 1 : 0
-    */
-   ureg_MOV(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), line);
-   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), line);
-   ureg_SGE(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(tmp), ureg_imm1f(shader, 0.5f));
-
-   return tmp;
-}
-
-static void *
-create_ycbcr_frag_shader(struct vl_mc *r, float scale)
-{
-   struct ureg_program *shader;
-   struct ureg_src tc[2], sampler;
-   struct ureg_dst texel, t_tc, field;
-   struct ureg_dst fragment;
-   unsigned label;
-
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
-
-   tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
-   tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VBOTTOM, TGSI_INTERPOLATE_LINEAR);
-
-   sampler = ureg_DECL_sampler(shader, 0);
-
-   t_tc = ureg_DECL_temporary(shader);
-   texel = ureg_DECL_temporary(shader);
-
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
-   field = calc_field(shader);
-
-   /*
-    * texel.y  = tex(field.y ? tc[1] : tc[0], sampler[0])
-    * texel.cb = tex(tc[2], sampler[1])
-    * texel.cr = tex(tc[2], sampler[2])
-    */
-
-   ureg_CMP(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_XYZ),
-            ureg_negate(ureg_scalar(ureg_src(field), TGSI_SWIZZLE_Y)),
-            tc[1], tc[0]);
-
-   ureg_SLT(shader, ureg_writemask(t_tc, TGSI_WRITEMASK_Z), ureg_src(t_tc), ureg_imm1f(shader, 0.5f));
-
-   ureg_MOV(shader, fragment, ureg_imm4f(shader, 0.0f, 0.0f, 0.0f, 1.0f));
-   ureg_IF(shader, ureg_scalar(ureg_src(t_tc), TGSI_SWIZZLE_Z), &label);
-
-      ureg_TEX(shader, texel, TGSI_TEXTURE_3D, ureg_src(t_tc), sampler);
-
-      if (scale != 1.0f)
-         ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
-                  ureg_src(texel), ureg_imm1f(shader, scale),
-                  ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X));
-      else
-         ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
-                  ureg_src(texel), ureg_scalar(ureg_src(field), TGSI_SWIZZLE_X));
-
-   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
-   ureg_ENDIF(shader);
-
-   ureg_release_temporary(shader, t_tc);
-   ureg_release_temporary(shader, texel);
-
-   return ureg_create_shader_and_destroy(shader, r->pipe);
-}
-
 static void *
 create_ref_frag_shader(struct vl_mc *r)
 {
@@ -342,7 +175,7 @@ create_ref_frag_shader(struct vl_mc *r)
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-   field = calc_field(shader);
+   field = calc_line(shader);
 
    /*
     * ref = field.z ? tc[1] : tc[0]
@@ -386,6 +219,149 @@ create_ref_frag_shader(struct vl_mc *r)
    return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
+static void *
+create_ycbcr_vert_shader(struct vl_mc *r)
+{
+   struct ureg_program *shader;
+
+   struct ureg_src vrect, vpos;
+   struct ureg_dst t_vpos, t_vtex;
+   struct ureg_dst o_vpos, o_vtex;
+
+   struct vertex2f scale = {
+      (float)BLOCK_WIDTH / r->buffer_width * MACROBLOCK_WIDTH / r->macroblock_size,
+      (float)BLOCK_HEIGHT / r->buffer_height * MACROBLOCK_HEIGHT / r->macroblock_size
+   };
+
+   unsigned label;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   t_vpos = calc_position(r, shader, ureg_imm2f(shader, scale.x, scale.y));
+   t_vtex = ureg_DECL_temporary(shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+   o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
+
+   /*
+    * o_vtex.xy = t_vpos
+    * o_vtex.z = intra * 0.5
+    *
+    * if(interlaced) {
+    *    t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 }
+    *    t_vtex.z = vpos.y % 2
+    *    t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y
+    *    o_vpos.y = t_vtex.y + t_vpos.y
+    *
+    *    o_vtex.w = t_vtex.z ? 0 : 1
+    * }
+    *
+    */
+   ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
+   ureg_MUL(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_Z),
+            ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f));
+   ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f));
+
+   if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO
+      ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label);
+
+         ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_XY),
+                  ureg_negate(ureg_scalar(vrect, TGSI_SWIZZLE_Y)),
+                  ureg_imm2f(shader, 0.0f, scale.y),
+                  ureg_imm2f(shader, -scale.y, 0.0f));
+         ureg_MUL(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z),
+                  ureg_scalar(vpos, TGSI_SWIZZLE_Y), ureg_imm1f(shader, 0.5f));
+
+         ureg_FRC(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Z), ureg_src(t_vtex));
+
+         ureg_CMP(shader, ureg_writemask(t_vtex, TGSI_WRITEMASK_Y),
+                  ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
+                  ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_X),
+                  ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Y));
+         ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y),
+                  ureg_src(t_vpos), ureg_src(t_vtex));
+
+         ureg_CMP(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_W),
+                  ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
+                  ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f));
+
+      ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+      ureg_ENDIF(shader);
+   }
+
+   ureg_release_temporary(shader, t_vtex);
+   ureg_release_temporary(shader, t_vpos);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
+static void *
+create_ycbcr_frag_shader(struct vl_mc *r, float scale)
+{
+   struct ureg_program *shader;
+   struct ureg_src tc, sampler;
+   struct ureg_dst tmp;
+   struct ureg_dst fragment;
+   unsigned label;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
+
+   sampler = ureg_DECL_sampler(shader, 0);
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   tmp = calc_line(shader);
+
+   /*
+    * if (field == tc.w)
+    *    kill();
+    * else {
+    *    fragment.xyz  = tex(tc, sampler) * scale + tc.z
+    *    fragment.w = 1.0f
+    * }
+    */
+
+   ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
+            ureg_scalar(tc, TGSI_SWIZZLE_W), ureg_src(tmp));
+
+   ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
+
+      ureg_KILP(shader);
+
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+   ureg_ELSE(shader, &label);
+
+      ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc, sampler);
+
+      if (scale != 1.0f)
+         ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
+                  ureg_src(tmp), ureg_imm1f(shader, scale),
+                  ureg_scalar(tc, TGSI_SWIZZLE_Z));
+      else
+         ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
+                  ureg_src(tmp), ureg_scalar(tc, TGSI_SWIZZLE_Z));
+
+      ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
+
+   ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
+   ureg_ENDIF(shader);
+
+   ureg_release_temporary(shader, tmp);
+
+   return ureg_create_shader_and_destroy(shader, r->pipe);
+}
+
 static bool
 init_pipe_state(struct vl_mc *r)
 {
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 7e72fbaee3f..182294894c1 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -108,7 +108,9 @@ map_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
 }
 
 static void
-upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane, unsigned x, unsigned y, short *block)
+upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane,
+             unsigned x, unsigned y, short *block,
+             bool intra, enum pipe_mpeg12_dct_type type)
 {
    unsigned tex_pitch;
    short *texels;
@@ -118,6 +120,8 @@ upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane, unsigned x, unsign
    assert(buffer);
    assert(block);
 
+   vl_vb_add_ycbcr(&buffer->vertex_stream, plane, x, y, intra, type);
+
    tex_pitch = buffer->tex_transfer[plane]->stride / sizeof(short);
    texels = buffer->texels[plane] + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
 
@@ -142,7 +146,8 @@ upload_buffer(struct vl_mpeg12_decoder *ctx,
    for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
          if (mb->cbp & (*ctx->empty_block_mask)[0][y][x]) {
-            upload_block(buffer, 0, mb->mbx * 2 + x, mb->mby * 2 + y, blocks);
+            upload_block(buffer, 0, mb->mbx * 2 + x, mb->mby * 2 + y, blocks,
+                         mb->dct_intra, mb->dct_type);
             blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
          }
       }
@@ -153,7 +158,8 @@ upload_buffer(struct vl_mpeg12_decoder *ctx,
 
    for (tb = 1; tb < 3; ++tb) {
       if (mb->cbp & (*ctx->empty_block_mask)[tb][0][0]) {
-         upload_block(buffer, tb, mb->mbx, mb->mby, blocks);
+         upload_block(buffer, tb, mb->mbx, mb->mby, blocks,
+                      mb->dct_intra, mb->dct_type);
          blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
       }
    }
@@ -245,7 +251,7 @@ vl_mpeg12_buffer_add_macroblocks(struct pipe_video_decode_buffer *buffer,
    assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
 
    for ( i = 0; i < num_macroblocks; ++i ) {
-      vl_vb_add_block(&buf->vertex_stream, &mb[i], dec->empty_block_mask);
+      vl_vb_add_block(&buf->vertex_stream, &mb[i]);
       upload_buffer(dec, buf, &mb[i]);
    }
 }
@@ -565,7 +571,7 @@ find_first_supported_format(struct vl_mpeg12_decoder *dec,
 static bool
 init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_height)
 {
-   unsigned chroma_width, chroma_height, chroma_blocks_x, chroma_blocks_y;
+   unsigned chroma_width, chroma_height;
    struct pipe_sampler_view *matrix, *transpose;
    float matrix_scale, transpose_scale;
 
@@ -619,28 +625,21 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_
       pipe_sampler_view_reference(&transpose, matrix);
 
    if (!vl_idct_init(&dec->idct_y, dec->pipe, buffer_width, buffer_height,
-                     2, 2, dec->nr_of_idct_render_targets, matrix, transpose))
+                     dec->nr_of_idct_render_targets, matrix, transpose))
       goto error_y;
 
    if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
       chroma_width = buffer_width / 2;
       chroma_height = buffer_height / 2;
-      chroma_blocks_x = 1;
-      chroma_blocks_y = 1;
    } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
       chroma_width = buffer_width;
       chroma_height = buffer_height / 2;
-      chroma_blocks_x = 2;
-      chroma_blocks_y = 1;
    } else {
       chroma_width = buffer_width;
       chroma_height = buffer_height;
-      chroma_blocks_x = 2;
-      chroma_blocks_y = 2;
    }
 
    if(!vl_idct_init(&dec->idct_c, dec->pipe, chroma_width, chroma_height,
-                    chroma_blocks_x, chroma_blocks_y,
                     dec->nr_of_idct_render_targets, matrix, transpose))
       goto error_c;
 
@@ -696,7 +695,7 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
 
    dec->pipe = pipe;
 
-   dec->quads = vl_vb_upload_quads(dec->pipe, 2, 2);
+   dec->quads = vl_vb_upload_quads(dec->pipe);
    dec->pos = vl_vb_upload_pos(
       dec->pipe,
       dec->base.width / MACROBLOCK_WIDTH,
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 89815c49e68..212ace7512a 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -32,11 +32,10 @@
 
 struct vl_ycbcr_vertex_stream
 {
-   struct vertex2s pos;
-   uint8_t mb_type_intra;
-   uint8_t dct_type_field;
-   uint8_t dummy[2];
-   uint8_t eb[2][2];
+   uint8_t x;
+   uint8_t y;
+   uint8_t intra;
+   uint8_t field;
 };
 
 struct vl_mv_vertex_stream
@@ -50,25 +49,25 @@ static const struct vertex2f block_quad[4] = {
 };
 
 struct pipe_vertex_buffer
-vl_vb_upload_quads(struct pipe_context *pipe, unsigned blocks_x, unsigned blocks_y)
+vl_vb_upload_quads(struct pipe_context *pipe)
 {
    struct pipe_vertex_buffer quad;
    struct pipe_transfer *buf_transfer;
-   struct vertex4f *v;
+   struct vertex2f *v;
 
-   unsigned x, y, i;
+   unsigned i;
 
    assert(pipe);
 
    /* create buffer */
-   quad.stride = sizeof(struct vertex4f);
+   quad.stride = sizeof(struct vertex2f);
    quad.buffer_offset = 0;
    quad.buffer = pipe_buffer_create
    (
       pipe->screen,
       PIPE_BIND_VERTEX_BUFFER,
       PIPE_USAGE_STATIC,
-      sizeof(struct vertex4f) * 4 * blocks_x * blocks_y
+      sizeof(struct vertex2f) * 4
    );
 
    if(!quad.buffer)
@@ -83,16 +82,9 @@ vl_vb_upload_quads(struct pipe_context *pipe, unsigned blocks_x, unsigned blocks
       &buf_transfer
    );
 
-   for ( y = 0; y < blocks_y; ++y) {
-      for ( x = 0; x < blocks_x; ++x) {
-         for (i = 0; i < 4; ++i, ++v) {
-            v->x = block_quad[i].x;
-            v->y = block_quad[i].y;
-
-            v->z = x;
-            v->w = y;
-         }
-      }
+   for (i = 0; i < 4; ++i, ++v) {
+      v->x = block_quad[i].x;
+      v->y = block_quad[i].y;
    }
 
    pipe_buffer_unmap(pipe, buf_transfer);
@@ -155,14 +147,14 @@ vl_vb_get_quad_vertex_element(void)
    element.src_offset = 0;
    element.instance_divisor = 0;
    element.vertex_buffer_index = 0;
-   element.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+   element.src_format = PIPE_FORMAT_R32G32_FLOAT;
 
    return element;
 }
 
 static void
 vl_vb_element_helper(struct pipe_vertex_element* elements, unsigned num_elements,
-                              unsigned vertex_buffer_index)
+                     unsigned vertex_buffer_index)
 {
    unsigned i, offset = 0;
 
@@ -187,17 +179,11 @@ vl_vb_get_ves_ycbcr(struct pipe_context *pipe)
    vertex_elems[VS_I_RECT] = vl_vb_get_quad_vertex_element();
 
    /* Position element */
-   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R16G16_SSCALED;
+   vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
 
-   /* flags */
-   vertex_elems[VS_I_FLAGS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
-
-   /* empty block element of selected component */
-   vertex_elems[VS_I_EB].src_format = PIPE_FORMAT_R8G8B8A8_USCALED;
-
-   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], NUM_VS_INPUTS - 1, 1);
+   vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1);
 
-   return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems);
+   return pipe->create_vertex_elements_state(pipe, 2, vertex_elems);
 }
 
 void *
@@ -227,7 +213,8 @@ vl_vb_get_ves_mv(struct pipe_context *pipe)
 }
 
 void
-vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned width, unsigned height)
+vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
+           unsigned width, unsigned height)
 {
    unsigned i, size;
 
@@ -245,7 +232,7 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe, unsigned
          pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
          PIPE_USAGE_STREAM,
-         sizeof(struct vl_ycbcr_vertex_stream) * size
+         sizeof(struct vl_ycbcr_vertex_stream) * size * 4
       );
    }
 
@@ -319,6 +306,24 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 
 }
 
+void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
+                     unsigned component, unsigned x, unsigned y,
+                     bool intra, enum pipe_mpeg12_dct_type type)
+{
+   struct vl_ycbcr_vertex_stream *stream;
+
+   assert(buffer);
+   assert(buffer->ycbcr[component].num_instances < buffer->width * buffer->height * 4);
+
+   stream = buffer->ycbcr[component].vertex_stream++;
+   stream->x = x;
+   stream->y = y;
+   stream->intra = intra;
+   stream->field = type == PIPE_MPEG12_DCT_TYPE_FIELD;
+
+   buffer->ycbcr[component].num_instances++;
+}
+
 static void
 get_motion_vectors(enum pipe_mpeg12_motion_type mo_type, struct pipe_motionvector *src, struct vertex4s dst[2])
 {
@@ -341,47 +346,14 @@ get_motion_vectors(enum pipe_mpeg12_motion_type mo_type, struct pipe_motionvecto
    dst[1].w = src->bottom.wheight;
 }
 
-static bool
-get_ycbcr_vectors(struct vl_ycbcr_vertex_stream *stream,
-                  struct pipe_mpeg12_macroblock *mb, const unsigned (*empty_block_mask)[2][2])
-{
-   bool completely_empty = true;
-   unsigned i, j;
-
-   stream->pos.x = mb->mbx;
-   stream->pos.y = mb->mby;
-   stream->dct_type_field = mb->dct_type == PIPE_MPEG12_DCT_TYPE_FIELD;
-   stream->mb_type_intra = mb->dct_intra;
-
-   for ( i = 0; i < 2; ++i)
-      for ( j = 0; j < 2; ++j) {
-         bool empty = !(mb->cbp & (*empty_block_mask)[i][j]);
-         stream->eb[i][j] = empty;
-         completely_empty &= empty;
-      }
-
-   return !completely_empty;
-}
-
 void
-vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb,
-                const unsigned (*empty_block_mask)[3][2][2])
+vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb)
 {
-   unsigned i, mv_pos;
+   unsigned mv_pos;
 
    assert(buffer);
    assert(mb);
 
-   if(mb->cbp) {
-      for (i = 0; i < VL_MAX_PLANES; ++i) {
-         assert(buffer->ycbcr[i].num_instances < buffer->width * buffer->height);
-         if (get_ycbcr_vectors(buffer->ycbcr[i].vertex_stream, mb, &(*empty_block_mask)[i])) {
-            buffer->ycbcr[i].vertex_stream++;
-            buffer->ycbcr[i].num_instances++;
-         }
-      }
-   }
-
    mv_pos = mb->mbx + mb->mby * buffer->width;
    get_motion_vectors(mb->mo_type, &mb->mv[0], buffer->mv[0].vertex_stream[mv_pos].mv);
    get_motion_vectors(mb->mo_type, &mb->mv[1], buffer->mv[1].vertex_stream[mv_pos].mv);
@@ -422,7 +394,7 @@ vl_vb_cleanup(struct vl_vertex_buffer *buffer)
 
    assert(buffer);
 
-   for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
       pipe_resource_reference(&buffer->ycbcr[i].resource, NULL);
    }
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 3d9c6141c5c..5632eb297bd 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -44,11 +44,8 @@ enum VS_INPUT
    VS_I_RECT,
    VS_I_VPOS,
 
-   VS_I_FLAGS,
-   VS_I_EB,
-
-   VS_I_MV_TOP = VS_I_FLAGS,
-   VS_I_MV_BOTTOM = VS_I_EB,
+   VS_I_MV_TOP,
+   VS_I_MV_BOTTOM,
 
    NUM_VS_INPUTS
 };
@@ -71,8 +68,7 @@ struct vl_vertex_buffer
    } mv[VL_MAX_REF_FRAMES];
 };
 
-struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe,
-                                             unsigned blocks_x, unsigned blocks_y);
+struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe);
 
 struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height);
 
@@ -90,8 +86,11 @@ struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int moti
 
 void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
-void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb,
-                     const unsigned (*empty_block_mask)[3][2][2]);
+void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
+                     unsigned component, unsigned x, unsigned y,
+                     bool intra, enum pipe_mpeg12_dct_type type);
+
+void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb);
 
 void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
-- 
cgit v1.2.3


From b7acf83d523563cde613fe805bd8edaa02f64b53 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 20 Apr 2011 13:44:26 +0200
Subject: [g3dvl] make motion vector buffers a public interface

---
 src/gallium/auxiliary/vl/vl_mc.c                   |   2 +-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c       |  23 ++-
 src/gallium/auxiliary/vl/vl_vertex_buffers.c       |  43 ++----
 src/gallium/auxiliary/vl/vl_vertex_buffers.h       |  18 ++-
 src/gallium/include/pipe/p_video_context.h         |  10 ++
 src/gallium/include/pipe/p_video_state.h           |  36 +++--
 src/gallium/state_trackers/xorg/xvmc/surface.c     | 166 +++++++++++++--------
 .../state_trackers/xorg/xvmc/xvmc_private.h        |   7 +-
 8 files changed, 182 insertions(+), 123 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
index 9785327cdbe..137a1beaa0d 100644
--- a/src/gallium/auxiliary/vl/vl_mc.c
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -136,7 +136,7 @@ create_ref_vert_shader(struct vl_mc *r)
       0.5f / r->buffer_width,
       0.5f / r->buffer_height,
       1.0f / 4.0f,
-      1.0f / 255.0f);
+      1.0f / PIPE_VIDEO_MV_WEIGHT_MAX);
 
    for (i = 0; i < 2; ++i) {
       ureg_MAD(shader, ureg_writemask(o_vmv[i], TGSI_WRITEMASK_XY), mv_scale, vmv[i], ureg_src(t_vpos));
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 182294894c1..f262c13e0fc 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -231,6 +231,26 @@ vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)
    map_buffers(dec, buf);
 }
 
+static unsigned
+vl_mpeg12_buffer_get_mv_stream_stride(struct pipe_video_decode_buffer *buffer)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+
+   assert(buf);
+
+   return vl_vb_get_mv_stream_stride(&buf->vertex_stream);
+}
+
+static struct pipe_motionvector *
+vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_frame)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+
+   assert(buf);
+
+   return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame);
+}
+
 static void
 vl_mpeg12_buffer_add_macroblocks(struct pipe_video_decode_buffer *buffer,
                                  unsigned num_macroblocks,
@@ -251,7 +271,6 @@ vl_mpeg12_buffer_add_macroblocks(struct pipe_video_decode_buffer *buffer,
    assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
 
    for ( i = 0; i < num_macroblocks; ++i ) {
-      vl_vb_add_block(&buf->vertex_stream, &mb[i]);
       upload_buffer(dec, buf, &mb[i]);
    }
 }
@@ -389,6 +408,8 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    buffer->base.decoder = decoder;
    buffer->base.destroy = vl_mpeg12_buffer_destroy;
    buffer->base.map = vl_mpeg12_buffer_map;
+   buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride;
+   buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream;
    buffer->base.add_macroblocks = vl_mpeg12_buffer_add_macroblocks;
    buffer->base.unmap = vl_mpeg12_buffer_unmap;
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index 212ace7512a..e61425843ff 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -38,11 +38,6 @@ struct vl_ycbcr_vertex_stream
    uint8_t field;
 };
 
-struct vl_mv_vertex_stream
-{
-   struct vertex4s mv[2];
-};
-
 /* vertices for a quad covering a block */
 static const struct vertex2f block_quad[4] = {
    {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
@@ -242,7 +237,7 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
          pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
          PIPE_USAGE_STREAM,
-         sizeof(struct vl_mv_vertex_stream) * size
+         sizeof(struct pipe_motionvector) * size
       );
    }
 
@@ -270,7 +265,7 @@ vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector)
 
    assert(buffer);
 
-   buf.stride = sizeof(struct vl_mv_vertex_stream);
+   buf.stride = sizeof(struct pipe_motionvector);
    buf.buffer_offset = 0;
    buf.buffer = buffer->mv[motionvector].resource;
 
@@ -324,39 +319,21 @@ void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
    buffer->ycbcr[component].num_instances++;
 }
 
-static void
-get_motion_vectors(enum pipe_mpeg12_motion_type mo_type, struct pipe_motionvector *src, struct vertex4s dst[2])
+unsigned
+vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer)
 {
-   if (mo_type == PIPE_MPEG12_MOTION_TYPE_FRAME) {
-      dst[0].x = dst[1].x = src->top.x;
-      dst[0].y = dst[1].y = src->top.y;
-      dst[0].z = dst[1].z = 0;
-
-   } else {
-      dst[0].x = src->top.x;
-      dst[0].y = src->top.y;
-      dst[0].z = src->top.field_select ? 3 : 1;
-
-      dst[1].x = src->bottom.x;
-      dst[1].y = src->bottom.y;
-      dst[1].z = src->bottom.field_select ? 3 : 1;
-   }
+   assert(buffer);
 
-   dst[0].w = src->top.wheight;
-   dst[1].w = src->bottom.wheight;
+   return buffer->width;
 }
 
-void
-vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb)
+struct pipe_motionvector *
+vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame)
 {
-   unsigned mv_pos;
-
    assert(buffer);
-   assert(mb);
+   assert(ref_frame < VL_MAX_REF_FRAMES);
 
-   mv_pos = mb->mbx + mb->mby * buffer->width;
-   get_motion_vectors(mb->mo_type, &mb->mv[0], buffer->mv[0].vertex_stream[mv_pos].mv);
-   get_motion_vectors(mb->mo_type, &mb->mv[1], buffer->mv[1].vertex_stream[mv_pos].mv);
+   return buffer->mv[ref_frame].vertex_stream;
 }
 
 void
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 5632eb297bd..6a83111b4a8 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -62,9 +62,9 @@ struct vl_vertex_buffer
    } ycbcr[VL_MAX_PLANES];
 
    struct {
-      struct pipe_resource       *resource;
-      struct pipe_transfer       *transfer;
-      struct vl_mv_vertex_stream *vertex_stream;
+      struct pipe_resource     *resource;
+      struct pipe_transfer     *transfer;
+      struct pipe_motionvector *vertex_stream;
    } mv[VL_MAX_REF_FRAMES];
 };
 
@@ -80,17 +80,19 @@ void vl_vb_init(struct vl_vertex_buffer *buffer,
                 struct pipe_context *pipe,
                 unsigned width, unsigned height);
 
-struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component);
-
-struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int motionvector);
-
 void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
+struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component);
+
 void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
                      unsigned component, unsigned x, unsigned y,
                      bool intra, enum pipe_mpeg12_dct_type type);
 
-void vl_vb_add_block(struct vl_vertex_buffer *buffer, struct pipe_mpeg12_macroblock *mb);
+struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int ref_frame);
+
+unsigned vl_vb_get_mv_stream_stride(struct vl_vertex_buffer *buffer);
+
+struct pipe_motionvector *vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, int ref_frame);
 
 void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 81fc2812249..1eb96420fb7 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -186,6 +186,16 @@ struct pipe_video_decode_buffer
     */
    void (*map)(struct pipe_video_decode_buffer *decbuf);
 
+   /**
+    * get the stride of the mv buffer
+    */
+   unsigned (*get_mv_stream_stride)(struct pipe_video_decode_buffer *decbuf);
+
+   /**
+    * get the pointer where to put the motion vectors of a ref frame
+    */
+   struct pipe_motionvector *(*get_mv_stream)(struct pipe_video_decode_buffer *decbuf, int ref_frame);
+
 #if 0
    /**
     * decode a bitstream
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 2cf5ea4d975..dcb64d3c220 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -50,31 +50,43 @@ enum pipe_mpeg12_picture_type
    PIPE_MPEG12_PICTURE_TYPE_FRAME
 };
 
-enum pipe_mpeg12_motion_type
-{
-   PIPE_MPEG12_MOTION_TYPE_FIELD,
-   PIPE_MPEG12_MOTION_TYPE_FRAME,
-   PIPE_MPEG12_MOTION_TYPE_DUALPRIME,
-   PIPE_MPEG12_MOTION_TYPE_16x8
-};
-
 enum pipe_mpeg12_dct_type
 {
    PIPE_MPEG12_DCT_TYPE_FIELD,
    PIPE_MPEG12_DCT_TYPE_FRAME
 };
 
+enum pipe_video_field_select
+{
+   PIPE_VIDEO_FRAME = 0,
+   PIPE_VIDEO_TOP_FIELD = 1,
+   PIPE_VIDEO_BOTTOM_FIELD = 3,
+
+   /* TODO
+   PIPE_VIDEO_DUALPRIME
+   PIPE_VIDEO_16x8
+   */
+};
+
+enum pipe_video_mv_weight
+{
+   PIPE_VIDEO_MV_WEIGHT_MIN = 0,
+   PIPE_VIDEO_MV_WEIGHT_HALF = 128,
+   PIPE_VIDEO_MV_WEIGHT_MAX = 256
+};
+
 struct pipe_macroblock
 {
    enum pipe_video_codec codec;
 };
 
+/* bitfields because this is used as a vertex buffer element */
 struct pipe_motionvector
 {
    struct {
-      signed x, y;
-      bool field_select;
-      unsigned wheight:8;
+      signed x:16, y:16;
+      enum pipe_video_field_select field_select:16;
+      enum pipe_video_mv_weight weight:16;
    } top, bottom;
 };
 
@@ -84,10 +96,8 @@ struct pipe_mpeg12_macroblock
 
    unsigned mbx;
    unsigned mby;
-   enum pipe_mpeg12_motion_type mo_type;
    bool dct_intra;
    enum pipe_mpeg12_dct_type dct_type;
-   struct pipe_motionvector mv[2];
    unsigned cbp;
    short *blocks;
 };
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 9752497eb30..567484e993d 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -60,30 +60,84 @@ static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic)
    return -1;
 }
 
-static enum pipe_mpeg12_motion_type MotionToPipe(int xvmc_motion_type, unsigned xvmc_picture_structure)
+static inline void
+MacroBlockTypeToPipeWeights(const XvMCMacroBlock *xvmc_mb, unsigned weights[2])
 {
-   switch (xvmc_motion_type) {
+   assert(xvmc_mb);
+
+   switch (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) {
+   case XVMC_MB_TYPE_MOTION_FORWARD:
+      weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX;
+      weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
+      break;
+
+   case (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD):
+      weights[0] = PIPE_VIDEO_MV_WEIGHT_HALF;
+      weights[1] = PIPE_VIDEO_MV_WEIGHT_HALF;
+      break;
+
+   case XVMC_MB_TYPE_MOTION_BACKWARD:
+      weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN;
+      weights[1] = PIPE_VIDEO_MV_WEIGHT_MAX;
+      break;
+
+   default:
+      /* workaround for xines xxmc video out plugin */
+      if (!(xvmc_mb->macroblock_type & ~XVMC_MB_TYPE_PATTERN)) {
+         weights[0] = PIPE_VIDEO_MV_WEIGHT_MAX;
+         weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
+      } else {
+         weights[0] = PIPE_VIDEO_MV_WEIGHT_MIN;
+         weights[1] = PIPE_VIDEO_MV_WEIGHT_MIN;
+      }
+      break;
+   }
+}
+
+static inline struct pipe_motionvector
+MotionVectorToPipe(const XvMCMacroBlock *xvmc_mb, unsigned vector,
+                   unsigned field_select_mask, unsigned weight)
+{
+   struct pipe_motionvector mv;
+
+   assert(xvmc_mb);
+
+   switch (xvmc_mb->motion_type) {
    case XVMC_PREDICTION_FRAME:
-      if (xvmc_picture_structure == XVMC_FRAME_PICTURE)
-         return PIPE_MPEG12_MOTION_TYPE_FRAME;
-      else
-         return PIPE_MPEG12_MOTION_TYPE_16x8;
+      mv.top.x = xvmc_mb->PMV[0][vector][0];
+      mv.top.y = xvmc_mb->PMV[0][vector][1];
+      mv.top.field_select = PIPE_VIDEO_FRAME;
+      mv.top.weight = weight;
+
+      mv.bottom.x = xvmc_mb->PMV[0][vector][0];
+      mv.bottom.y = xvmc_mb->PMV[0][vector][1];
+      mv.bottom.weight = weight;
+      mv.bottom.field_select = PIPE_VIDEO_FRAME;
       break;
 
    case XVMC_PREDICTION_FIELD:
-      return PIPE_MPEG12_MOTION_TYPE_FIELD;
+      mv.top.x = xvmc_mb->PMV[0][vector][0];
+      mv.top.y = xvmc_mb->PMV[0][vector][1];
+      mv.top.field_select = (xvmc_mb->motion_vertical_field_select & field_select_mask) ?
+         PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
+      mv.top.weight = weight;
+
+      mv.bottom.x = xvmc_mb->PMV[1][vector][0];
+      mv.bottom.y = xvmc_mb->PMV[1][vector][1];
+      mv.bottom.field_select = (xvmc_mb->motion_vertical_field_select & (field_select_mask << 2)) ?
+         PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
+      mv.bottom.weight = weight;
+      break;
 
-   case XVMC_PREDICTION_DUAL_PRIME:
-      return PIPE_MPEG12_MOTION_TYPE_DUALPRIME;
+   default: // TODO: Support DUALPRIME and 16x8
+      break;
    }
 
-   XVMC_MSG(XVMC_ERR, "[XvMC] Unrecognized motion type 0x%08X (with picture structure 0x%08X).\n", xvmc_motion_type, xvmc_picture_structure);
-
-   return -1;
+   return mv;
 }
 
 static void
-MacroBlocksToPipe(struct pipe_screen *screen,
+MacroBlocksToPipe(XvMCSurfacePrivate *surface,
                   unsigned int xvmc_picture_structure,
                   const XvMCMacroBlock *xvmc_mb,
                   const XvMCBlockArray *xvmc_blocks,
@@ -98,62 +152,32 @@ MacroBlocksToPipe(struct pipe_screen *screen,
    assert(num_macroblocks);
 
    for (i = 0; i < num_macroblocks; ++i) {
+      unsigned mv_pos = xvmc_mb->x + surface->mv_stride * xvmc_mb->y;
+      unsigned mv_weights[2];
+
       mb->base.codec = PIPE_VIDEO_CODEC_MPEG12;
       mb->mbx = xvmc_mb->x;
       mb->mby = xvmc_mb->y;
 
-      if (!(xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA))
-         mb->mo_type = MotionToPipe(xvmc_mb->motion_type, xvmc_picture_structure);
-      /* Get rid of Valgrind 'undefined' warnings */
-      else
-         mb->mo_type = -1;
-
       mb->dct_intra = xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA;
       mb->dct_type = xvmc_mb->dct_type == XVMC_DCT_TYPE_FIELD ?
          PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
+      mb->cbp = xvmc_mb->coded_block_pattern;
+      mb->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
 
-      switch (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD)) {
-      case XVMC_MB_TYPE_MOTION_FORWARD:
-         mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 255;
-         mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 0;
-         break;
-
-      case (XVMC_MB_TYPE_MOTION_FORWARD | XVMC_MB_TYPE_MOTION_BACKWARD):
-         mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 127;
-         mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 127;
-         break;
-
-      case XVMC_MB_TYPE_MOTION_BACKWARD:
-         mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 0;
-         mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 255;
-         break;
-
-      default:
-         /* workaround for xines xxmc video out plugin */
-         if (!(xvmc_mb->macroblock_type & ~XVMC_MB_TYPE_PATTERN)) {
-            mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 255;
-            mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 0;
-         } else {
-            mb->mv[0].top.wheight = mb->mv[0].bottom.wheight = 0;
-            mb->mv[1].top.wheight = mb->mv[1].bottom.wheight = 0;
-         }
-         break;
-      }
+      MacroBlockTypeToPipeWeights(xvmc_mb, mv_weights);
 
       for (j = 0; j < 2; ++j) {
-         mb->mv[j].top.x = xvmc_mb->PMV[0][j][0];
-         mb->mv[j].top.y = xvmc_mb->PMV[0][j][1];
-         mb->mv[j].bottom.x = xvmc_mb->PMV[1][j][0];
-         mb->mv[j].bottom.y = xvmc_mb->PMV[1][j][1];
-      }
+         if (!surface->ref[j].mv) continue;
 
-      mb->mv[0].top.field_select = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_FIRST_FORWARD;
-      mb->mv[1].top.field_select = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_FIRST_BACKWARD;
-      mb->mv[0].bottom.field_select = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_SECOND_FORWARD;
-      mb->mv[1].bottom.field_select = xvmc_mb->motion_vertical_field_select & XVMC_SELECT_SECOND_BACKWARD;
+         surface->ref[j].mv[mv_pos] = MotionVectorToPipe
+         (
+            xvmc_mb, j,
+            j ? XVMC_SELECT_FIRST_BACKWARD : XVMC_SELECT_FIRST_FORWARD,
+            mv_weights[j]
+         );
 
-      mb->cbp = xvmc_mb->coded_block_pattern;
-      mb->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
+      }
 
       ++mb;
       ++xvmc_mb;
@@ -172,13 +196,13 @@ unmap_and_flush_surface(XvMCSurfacePrivate *surface)
    context_priv = surface->context->privData;
 
    for ( i = 0; i < 2; ++i ) {
-      if (surface->ref_surfaces[i]) {
-         XvMCSurfacePrivate *ref = surface->ref_surfaces[i]->privData;
+      if (surface->ref[i].surface) {
+         XvMCSurfacePrivate *ref = surface->ref[i].surface->privData;
 
          assert(ref);
 
          unmap_and_flush_surface(ref);
-         surface->ref_surfaces[i] = NULL;
+         surface->ref[i].surface = NULL;
          ref_frames[i] = ref->video_buffer;
       } else {
          ref_frames[i] = NULL;
@@ -225,6 +249,7 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
       return BadAlloc;
 
    surface_priv->decode_buffer = context_priv->decoder->create_buffer(context_priv->decoder);
+   surface_priv->mv_stride = surface_priv->decode_buffer->get_mv_stream_stride(surface_priv->decode_buffer);
    surface_priv->video_buffer = vpipe->create_buffer(vpipe, PIPE_FORMAT_YV12, //TODO
                                                      resource_formats,
                                                      context_priv->decoder->chroma_format,
@@ -262,6 +287,8 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    XvMCSurfacePrivate *future_surface_priv;
    XvMCMacroBlock *xvmc_mb;
 
+   unsigned i;
+
    struct pipe_mpeg12_macroblock pipe_macroblocks[num_macroblocks];
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Rendering to surface %p, with past %p and future %p\n",
@@ -319,23 +346,30 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
 
    /* If the surface we're rendering hasn't changed the ref frames shouldn't change. */
    if (target_surface_priv->mapped && (
-       target_surface_priv->ref_surfaces[0] != past_surface ||
-       target_surface_priv->ref_surfaces[1] != future_surface ||
+       target_surface_priv->ref[0].surface != past_surface ||
+       target_surface_priv->ref[1].surface != future_surface ||
        (xvmc_mb->x == 0 && xvmc_mb->y == 0))) {
 
       // If they change anyway we need to clear our surface
       unmap_and_flush_surface(target_surface_priv);
    }
 
-   MacroBlocksToPipe(vpipe->screen, picture_structure, xvmc_mb, blocks, num_macroblocks, pipe_macroblocks);
-
    if (!target_surface_priv->mapped) {
       t_buffer->map(t_buffer);
-      target_surface_priv->ref_surfaces[0] = past_surface;
-      target_surface_priv->ref_surfaces[1] = future_surface;
+
+      for (i = 0; i < 2; ++i) {
+         target_surface_priv->ref[i].surface = i == 0 ? past_surface : future_surface;
+
+         if (target_surface_priv->ref[i].surface)
+            target_surface_priv->ref[i].mv = t_buffer->get_mv_stream(t_buffer, i);
+         else
+            target_surface_priv->ref[i].mv = NULL;
+      }
       target_surface_priv->mapped = 1;
    }
 
+   MacroBlocksToPipe(target_surface_priv, picture_structure, xvmc_mb, blocks, num_macroblocks, pipe_macroblocks);
+
    t_buffer->add_macroblocks(t_buffer, num_macroblocks, &pipe_macroblocks->base);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for rendering.\n", target_surface);
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 056bdfc2f3c..a85d58a519c 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -71,7 +71,12 @@ typedef struct
 
    bool mapped; // are we still mapped to memory?
 
-   XvMCSurface *ref_surfaces[2];
+   unsigned mv_stride;
+
+   struct {
+      XvMCSurface *surface;
+      struct pipe_motionvector *mv;
+   } ref[2];
 
    struct pipe_fence_handle *flush_fence;
    struct pipe_fence_handle *render_fence;
-- 
cgit v1.2.3


From f0819a22f3dc63d1c0dde6320babf9b7fcda15bb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 23 Apr 2011 03:37:05 +0200
Subject: [g3dvl] start implementing zscan and quantification

Not 100% complete, but at least a good start.
---
 src/gallium/auxiliary/Makefile               |   1 +
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 332 ++++++++++++------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h |  10 +
 src/gallium/auxiliary/vl/vl_vertex_buffers.c |   9 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.h |   6 +-
 src/gallium/auxiliary/vl/vl_zscan.c          | 492 +++++++++++++++++++++++++++
 src/gallium/auxiliary/vl/vl_zscan.h          | 110 ++++++
 7 files changed, 849 insertions(+), 111 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_zscan.c
 create mode 100644 src/gallium/auxiliary/vl/vl_zscan.h

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index d210a25510b..aa1b255c0c9 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -152,6 +152,7 @@ C_SOURCES = \
 	vl/vl_mpeg12_decoder.c \
 	vl/vl_compositor.c \
 	vl/vl_csc.c \
+	vl/vl_zscan.c \
         vl/vl_idct.c \
 	vl/vl_mc.c \
         vl/vl_vertex_buffers.c \
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index f262c13e0fc..3b1d26d3db8 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -44,6 +44,14 @@ static const unsigned const_empty_block_mask_420[3][2][2] = {
    { { 0x01, 0x01 },  { 0x01, 0x01 } }
 };
 
+static const enum pipe_format const_zscan_source_formats[] = {
+   PIPE_FORMAT_R16_SNORM,
+   PIPE_FORMAT_R16_SSCALED
+};
+
+static const unsigned num_zscan_source_formats =
+   sizeof(const_zscan_source_formats) / sizeof(enum pipe_format);
+
 static const enum pipe_format const_idct_source_formats[] = {
    PIPE_FORMAT_R16G16B16A16_SNORM,
    PIPE_FORMAT_R16G16B16A16_SSCALED
@@ -79,10 +87,8 @@ map_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
 
    assert(ctx && buffer);
 
-   if (ctx->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      sampler_views = buffer->idct_source->get_sampler_views(buffer->idct_source);
-   else
-      sampler_views = buffer->mc_source->get_sampler_views(buffer->mc_source);
+   sampler_views = buffer->zscan_source->get_sampler_views(buffer->zscan_source);
+
    assert(sampler_views);
 
    for (i = 0; i < VL_MAX_PLANES; ++i) {
@@ -112,21 +118,17 @@ upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane,
              unsigned x, unsigned y, short *block,
              bool intra, enum pipe_mpeg12_dct_type type)
 {
-   unsigned tex_pitch;
    short *texels;
-
-   unsigned i;
+   unsigned idx;
 
    assert(buffer);
    assert(block);
 
-   vl_vb_add_ycbcr(&buffer->vertex_stream, plane, x, y, intra, type);
+   idx = vl_vb_add_ycbcr(&buffer->vertex_stream, plane, x, y, intra, type);
 
-   tex_pitch = buffer->tex_transfer[plane]->stride / sizeof(short);
-   texels = buffer->texels[plane] + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
+   texels = buffer->texels[plane] + idx * BLOCK_WIDTH * BLOCK_HEIGHT;
 
-   for (i = 0; i < BLOCK_HEIGHT; ++i)
-      memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * sizeof(short));
+   memcpy(texels, block, BLOCK_WIDTH * BLOCK_HEIGHT * sizeof(short));
 }
 
 static void
@@ -178,6 +180,144 @@ unmap_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
    }
 }
 
+static bool
+init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
+{
+   enum pipe_format formats[3];
+
+   struct pipe_sampler_view **source;
+   struct pipe_surface **destination;
+
+   struct vl_mpeg12_decoder *dec;
+
+   unsigned i;
+
+   assert(buffer);
+
+   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
+
+   formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
+   buffer->zscan_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                               dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
+                                               dec->max_blocks / dec->blocks_per_line,
+                                               1, PIPE_VIDEO_CHROMA_FORMAT_444,
+                                               formats, PIPE_USAGE_STATIC);
+   if (!buffer->zscan_source)
+      goto error_source;
+
+   source = buffer->zscan_source->get_sampler_views(buffer->zscan_source);
+   if (!source)
+      goto error_sampler;
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      destination = buffer->idct_source->get_surfaces(buffer->idct_source);
+   else
+      destination = buffer->mc_source->get_surfaces(buffer->mc_source);
+
+   if (!destination)
+      goto error_surface;
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c,
+                                &buffer->zscan[i], source[i], destination[i]))
+         goto error_plane;
+
+   return true;
+
+error_plane:
+   for (; i > 0; --i)
+      vl_zscan_cleanup_buffer(&buffer->zscan[i - 1]);
+
+error_surface:
+error_sampler:
+   buffer->zscan_source->destroy(buffer->zscan_source);
+
+error_source:
+   return false;
+}
+
+static void
+cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer)
+{
+   unsigned i;
+
+   assert(buffer);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      vl_zscan_cleanup_buffer(&buffer->zscan[i]);
+   buffer->zscan_source->destroy(buffer->zscan_source);
+}
+
+static bool
+init_idct_buffer(struct vl_mpeg12_buffer *buffer)
+{
+   enum pipe_format formats[3];
+
+   struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv;
+   struct pipe_surface **idct_surfaces;
+
+   struct vl_mpeg12_decoder *dec;
+
+   unsigned i;
+
+   assert(buffer);
+
+   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
+
+   formats[0] = formats[1] = formats[2] = dec->idct_source_format;
+   buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                              dec->base.width / 4, dec->base.height, 1,
+                                              dec->base.chroma_format,
+                                              formats, PIPE_USAGE_STATIC);
+   if (!buffer->idct_source)
+      goto error_source;
+
+   formats[0] = formats[1] = formats[2] = dec->idct_intermediate_format;
+   buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                                    dec->base.width / dec->nr_of_idct_render_targets,
+                                                    dec->base.height / 4, dec->nr_of_idct_render_targets,
+                                                    dec->base.chroma_format,
+                                                    formats, PIPE_USAGE_STATIC);
+
+   if (!buffer->idct_intermediate)
+      goto error_intermediate;
+
+   idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source);
+   if (!idct_source_sv)
+      goto error_source_sv;
+
+   idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate);
+   if (!idct_intermediate_sv)
+      goto error_intermediate_sv;
+
+   idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source);
+   if (!idct_surfaces)
+      goto error_surfaces;
+
+   for (i = 0; i < 3; ++i)
+      if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
+                               &buffer->idct[i], idct_source_sv[i],
+                               idct_intermediate_sv[i], idct_surfaces[i]))
+         goto error_plane;
+
+   return true;
+
+error_plane:
+   for (; i > 0; --i)
+      vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
+
+error_surfaces:
+error_intermediate_sv:
+error_source_sv:
+   buffer->idct_intermediate->destroy(buffer->idct_intermediate);
+
+error_intermediate:
+   buffer->idct_source->destroy(buffer->idct_source);
+
+error_source:
+   return false;
+}
+
 static void
 cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
 {
@@ -187,11 +327,11 @@ cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
    assert(dec);
 
-   buf->idct_source->destroy(buf->idct_source);
-   buf->idct_intermediate->destroy(buf->idct_intermediate);
    vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
    vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
    vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
+   buf->idct_source->destroy(buf->idct_source);
+   buf->idct_intermediate->destroy(buf->idct_intermediate);
 }
 
 static void
@@ -206,6 +346,8 @@ vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
    assert(dec);
 
+   cleanup_zscan_buffer(buf);
+
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
       cleanup_idct_buffer(buf);
 
@@ -310,6 +452,9 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
       vl_idct_cleanup(&dec->idct_c);
    }
 
+   vl_zscan_cleanup(&dec->zscan_y);
+   vl_zscan_cleanup(&dec->zscan_c);
+
    dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
    dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv);
 
@@ -319,76 +464,6 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    FREE(dec);
 }
 
-static bool
-init_idct_buffer(struct vl_mpeg12_buffer *buffer)
-{
-   enum pipe_format formats[3];
-
-   struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv;
-   struct pipe_surface **idct_surfaces;
-
-   struct vl_mpeg12_decoder *dec;
-
-   unsigned i;
-
-   assert(buffer);
-
-   dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
-
-   formats[0] = formats[1] = formats[2] = dec->idct_source_format;
-   buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                              dec->base.width / 4, dec->base.height, 1,
-                                              dec->base.chroma_format,
-                                              formats, PIPE_USAGE_STREAM);
-   if (!buffer->idct_source)
-      goto error_source;
-
-   formats[0] = formats[1] = formats[2] = dec->idct_intermediate_format;
-   buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                                    dec->base.width / dec->nr_of_idct_render_targets,
-                                                    dec->base.height / 4, dec->nr_of_idct_render_targets,
-                                                    dec->base.chroma_format,
-                                                    formats, PIPE_USAGE_STATIC);
-
-   if (!buffer->idct_intermediate)
-      goto error_intermediate;
-
-   idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source);
-   if (!idct_source_sv)
-      goto error_source_sv;
-
-   idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate);
-   if (!idct_intermediate_sv)
-      goto error_intermediate_sv;
-
-   idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source);
-   if (!idct_surfaces)
-      goto error_surfaces;
-
-   for (i = 0; i < 3; ++i)
-      if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
-                               &buffer->idct[i], idct_source_sv[i],
-                               idct_intermediate_sv[i], idct_surfaces[i]))
-         goto error_plane;
-
-   return true;
-
-error_plane:
-   for (; i > 0; --i)
-      vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
-
-error_surfaces:
-error_intermediate_sv:
-error_source_sv:
-   buffer->idct_intermediate->destroy(buffer->idct_intermediate);
-
-error_intermediate:
-   buffer->idct_source->destroy(buffer->idct_source);
-
-error_source:
-   return false;
-}
-
 static struct pipe_video_decode_buffer *
 vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
 {
@@ -426,10 +501,6 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    if (!buffer->mc_source)
       goto error_mc_source;
 
-   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      if (!init_idct_buffer(buffer))
-         goto error_idct;
-
    mc_source_sv = buffer->mc_source->get_sampler_views(buffer->mc_source);
    if (!mc_source_sv)
       goto error_mc_source_sv;
@@ -443,8 +514,18 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    if(!vl_mc_init_buffer(&dec->mc_c, &buffer->mc[2], mc_source_sv[2]))
       goto error_mc_cr;
 
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+      if (!init_idct_buffer(buffer))
+         goto error_idct;
+
+   if (!init_zscan_buffer(buffer))
+      goto error_zscan;
+
    return &buffer->base;
 
+error_zscan:
+   // TODO Cleanup error handling
+
 error_mc_cr:
    vl_mc_cleanup_buffer(&buffer->mc[1]);
 
@@ -517,6 +598,8 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
       vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
       dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
 
+      vl_zscan_render(&buf->zscan[i] , num_instances);
+
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
          vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_instances);
 
@@ -590,9 +673,47 @@ find_first_supported_format(struct vl_mpeg12_decoder *dec,
 }
 
 static bool
-init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_height)
+init_zscan(struct vl_mpeg12_decoder *dec)
+{
+   struct pipe_sampler_view *layout;
+
+   unsigned num_channels;
+
+   assert(dec);
+
+   dec->blocks_per_line = 4;
+   dec->max_blocks =
+      (dec->base.width * dec->base.height) /
+      (BLOCK_WIDTH * BLOCK_HEIGHT);
+
+   dec->zscan_source_format = find_first_supported_format(dec, const_zscan_source_formats,
+                                                          num_zscan_source_formats, PIPE_TEXTURE_2D);
+
+   if (dec->zscan_source_format == PIPE_FORMAT_NONE)
+      return false;
+
+   layout = vl_zscan_linear(dec->pipe, dec->blocks_per_line);
+
+   num_channels = dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 4 : 1;
+
+   if (!vl_zscan_init(&dec->zscan_y, dec->pipe, dec->base.width, dec->base.height,
+                      dec->blocks_per_line, dec->max_blocks, num_channels))
+      return false;
+
+   vl_zscan_set_layout(&dec->zscan_y, layout);
+
+   if (!vl_zscan_init(&dec->zscan_c, dec->pipe, dec->chroma_width, dec->chroma_height,
+                      dec->blocks_per_line, dec->max_blocks, num_channels))
+      return false;
+
+   vl_zscan_set_layout(&dec->zscan_c, layout);
+
+   return true;
+}
+
+static bool
+init_idct(struct vl_mpeg12_decoder *dec)
 {
-   unsigned chroma_width, chroma_height;
    struct pipe_sampler_view *matrix, *transpose;
    float matrix_scale, transpose_scale;
 
@@ -645,22 +766,11 @@ init_idct(struct vl_mpeg12_decoder *dec, unsigned buffer_width, unsigned buffer_
    } else
       pipe_sampler_view_reference(&transpose, matrix);
 
-   if (!vl_idct_init(&dec->idct_y, dec->pipe, buffer_width, buffer_height,
+   if (!vl_idct_init(&dec->idct_y, dec->pipe, dec->base.width, dec->base.height,
                      dec->nr_of_idct_render_targets, matrix, transpose))
       goto error_y;
 
-   if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      chroma_width = buffer_width / 2;
-      chroma_height = buffer_height / 2;
-   } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
-      chroma_width = buffer_width;
-      chroma_height = buffer_height / 2;
-   } else {
-      chroma_width = buffer_width;
-      chroma_height = buffer_height;
-   }
-
-   if(!vl_idct_init(&dec->idct_c, dec->pipe, chroma_width, chroma_height,
+   if(!vl_idct_init(&dec->idct_c, dec->pipe, dec->chroma_width, dec->chroma_height,
                     dec->nr_of_idct_render_targets, matrix, transpose))
       goto error_c;
 
@@ -736,8 +846,22 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    if (dec->mc_source_format == PIPE_FORMAT_NONE)
       return NULL;
 
+   if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
+      dec->chroma_width = dec->base.width / 2;
+      dec->chroma_height = dec->base.height / 2;
+   } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) {
+      dec->chroma_width = dec->base.width;
+      dec->chroma_height = dec->base.height / 2;
+   } else {
+      dec->chroma_width = dec->base.width;
+      dec->chroma_height = dec->base.height;
+   }
+
+   if (!init_zscan(dec))
+      return NULL; // TODO error handling
+
    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
-      if (!init_idct(dec, dec->base.width, dec->base.height))
+      if (!init_idct(dec))
          goto error_idct;
       if (dec->mc_source_format == PIPE_FORMAT_R16_SSCALED)
          mc_scale = SCALE_FACTOR_SSCALED;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index c961e433b51..b94f12a9b7a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -30,6 +30,7 @@
 
 #include <pipe/p_video_context.h>
 
+#include "vl_zscan.h"
 #include "vl_idct.h"
 #include "vl_mc.h"
 
@@ -44,9 +45,15 @@ struct vl_mpeg12_decoder
    struct pipe_video_decoder base;
    struct pipe_context *pipe;
 
+   unsigned chroma_width, chroma_height;
+
+   unsigned blocks_per_line;
+   unsigned max_blocks;
+
    const unsigned (*empty_block_mask)[3][2][2];
    unsigned nr_of_idct_render_targets;
 
+   enum pipe_format zscan_source_format;
    enum pipe_format idct_source_format;
    enum pipe_format idct_intermediate_format;
    enum pipe_format mc_source_format;
@@ -57,6 +64,7 @@ struct vl_mpeg12_decoder
    void *ves_ycbcr;
    void *ves_mv;
 
+   struct vl_zscan zscan_y, zscan_c;
    struct vl_idct idct_y, idct_c;
    struct vl_mc mc_y, mc_c;
 
@@ -69,10 +77,12 @@ struct vl_mpeg12_buffer
 
    struct vl_vertex_buffer vertex_stream;
 
+   struct pipe_video_buffer *zscan_source;
    struct pipe_video_buffer *idct_source;
    struct pipe_video_buffer *idct_intermediate;
    struct pipe_video_buffer *mc_source;
 
+   struct vl_zscan_buffer zscan[VL_MAX_PLANES];
    struct vl_idct_buffer idct[VL_MAX_PLANES];
    struct vl_mc_buffer mc[VL_MAX_PLANES];
 
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index e61425843ff..d2025f76b86 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -301,9 +301,10 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 
 }
 
-void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
-                     unsigned component, unsigned x, unsigned y,
-                     bool intra, enum pipe_mpeg12_dct_type type)
+unsigned
+vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
+                unsigned component, unsigned x, unsigned y,
+                bool intra, enum pipe_mpeg12_dct_type type)
 {
    struct vl_ycbcr_vertex_stream *stream;
 
@@ -316,7 +317,7 @@ void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
    stream->intra = intra;
    stream->field = type == PIPE_MPEG12_DCT_TYPE_FIELD;
 
-   buffer->ycbcr[component].num_instances++;
+   return buffer->ycbcr[component].num_instances++;
 }
 
 unsigned
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 6a83111b4a8..89d455225ad 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -84,9 +84,9 @@ void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
 struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component);
 
-void vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
-                     unsigned component, unsigned x, unsigned y,
-                     bool intra, enum pipe_mpeg12_dct_type type);
+unsigned vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
+                         unsigned component, unsigned x, unsigned y,
+                         bool intra, enum pipe_mpeg12_dct_type type);
 
 struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int ref_frame);
 
diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
new file mode 100644
index 00000000000..4d4d3fd6d95
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -0,0 +1,492 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <assert.h>
+
+#include <pipe/p_screen.h>
+#include <pipe/p_context.h>
+
+#include <util/u_draw.h>
+#include <util/u_sampler.h>
+#include <util/u_inlines.h>
+
+#include <tgsi/tgsi_ureg.h>
+
+#include <vl/vl_defines.h>
+#include <vl/vl_types.h>
+
+#include "vl_zscan.h"
+#include "vl_vertex_buffers.h"
+
+enum VS_OUTPUT
+{
+   VS_O_VPOS,
+   VS_O_VTEX
+};
+
+static void *
+create_vert_shader(struct vl_zscan *zscan)
+{
+   struct ureg_program *shader;
+
+   struct ureg_src scale, instance;
+   struct ureg_src vrect, vpos;
+
+   struct ureg_dst tmp;
+   struct ureg_dst o_vpos, o_vtex[zscan->num_channels];
+
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   scale = ureg_imm2f(shader,
+      (float)BLOCK_WIDTH / zscan->buffer_width,
+      (float)BLOCK_HEIGHT / zscan->buffer_height);
+
+   instance = ureg_DECL_system_value(shader, 0, TGSI_SEMANTIC_INSTANCEID, 0);
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   tmp = ureg_DECL_temporary(shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      o_vtex[i] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i);
+
+   /*
+    * o_vpos.xy = (vpos + vrect) * scale
+    * o_vpos.zw = 1.0f
+    *
+    * tmp.xy = InstanceID / blocks_per_line
+    * tmp.x = frac(tmp.x)
+    * tmp.y = floor(tmp.y)
+    *
+    * o_vtex.x = vrect.x / blocks_per_line + tmp.x
+    * o_vtex.y = vrect.y
+    * o_vtex.z = tmp.z * blocks_per_line / blocks_total
+    */
+   ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XY), vpos, vrect);
+   ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
+
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), instance,
+            ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
+
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp));
+   ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp));
+
+   for (i = 0; i < zscan->num_channels; ++i) {
+      if (i > 0)
+         ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp),
+                  ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * BLOCK_WIDTH)));
+
+      ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
+               ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
+      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
+      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_src(tmp),
+               ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
+   }
+
+   ureg_release_temporary(shader, tmp);
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, zscan->pipe);
+}
+
+static void *
+create_frag_shader(struct vl_zscan *zscan)
+{
+   struct ureg_program *shader;
+   struct ureg_src vtex[zscan->num_channels];
+
+   struct ureg_src src, scan, quant;
+
+   struct ureg_dst tmp[zscan->num_channels];
+   struct ureg_dst fragment;
+
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);
+
+   src = ureg_DECL_sampler(shader, 0);
+   scan = ureg_DECL_sampler(shader, 1);
+   quant = ureg_DECL_sampler(shader, 2);
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      tmp[i] = ureg_DECL_temporary(shader);
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /*
+    * tmp.x = tex(vtex, 1)
+    * tmp.y = vtex.z
+    * fragment = tex(tmp, 0) * quant
+    */
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], scan);
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_Z));
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_TEX(shader, tmp[i], TGSI_TEXTURE_2D, ureg_src(tmp[i]), src);
+
+   // TODO: Fetch quant and use it
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), ureg_src(tmp[i]), ureg_imm1f(shader, 1.0f));
+
+   for (i = 0; i < zscan->num_channels; ++i)
+      ureg_release_temporary(shader, tmp[i]);
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, zscan->pipe);
+}
+
+static bool
+init_shaders(struct vl_zscan *zscan)
+{
+   assert(zscan);
+
+   zscan->vs = create_vert_shader(zscan);
+   if (!zscan->vs)
+      goto error_vs;
+
+   zscan->fs = create_frag_shader(zscan);
+   if (!zscan->fs)
+      goto error_fs;
+
+   return true;
+
+error_fs:
+   zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
+
+error_vs:
+   return false;
+}
+
+static void
+cleanup_shaders(struct vl_zscan *zscan)
+{
+   assert(zscan);
+
+   zscan->pipe->delete_vs_state(zscan->pipe, zscan->vs);
+   zscan->pipe->delete_fs_state(zscan->pipe, zscan->fs);
+}
+
+static bool
+init_state(struct vl_zscan *zscan)
+{
+   struct pipe_blend_state blend;
+   struct pipe_rasterizer_state rs_state;
+   struct pipe_sampler_state sampler;
+   unsigned i;
+
+   assert(zscan);
+
+   memset(&rs_state, 0, sizeof(rs_state));
+   rs_state.gl_rasterization_rules = false;
+   zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state);
+   if (!zscan->rs_state)
+      goto error_rs_state;
+
+   memset(&blend, 0, sizeof blend);
+
+   blend.independent_blend_enable = 0;
+   blend.rt[0].blend_enable = 0;
+   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_ONE;
+   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+   blend.logicop_enable = 0;
+   blend.logicop_func = PIPE_LOGICOP_CLEAR;
+   /* Needed to allow color writes to FB, even if blending disabled */
+   blend.rt[0].colormask = PIPE_MASK_RGBA;
+   blend.dither = 0;
+   zscan->blend = zscan->pipe->create_blend_state(zscan->pipe, &blend);
+   if (!zscan->blend)
+      goto error_blend;
+
+   for (i = 0; i < 3; ++i) {
+      memset(&sampler, 0, sizeof(sampler));
+      sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
+      sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
+      sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
+      sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+      sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+      sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+      sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+      sampler.compare_func = PIPE_FUNC_ALWAYS;
+      sampler.normalized_coords = 1;
+      zscan->samplers[i] = zscan->pipe->create_sampler_state(zscan->pipe, &sampler);
+      if (!zscan->samplers[i])
+         goto error_samplers;
+   }
+
+   return true;
+
+error_samplers:
+   for (i = 0; i < 2; ++i)
+      if (zscan->samplers[i])
+         zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
+
+   zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
+
+error_blend:
+   zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
+
+error_rs_state:
+   return false;
+}
+
+static void
+cleanup_state(struct vl_zscan *zscan)
+{
+   unsigned i;
+
+   assert(zscan);
+
+   for (i = 0; i < 3; ++i)
+      zscan->pipe->delete_sampler_state(zscan->pipe, zscan->samplers[i]);
+
+   zscan->pipe->delete_rasterizer_state(zscan->pipe, zscan->rs_state);
+   zscan->pipe->delete_blend_state(zscan->pipe, zscan->blend);
+}
+
+struct pipe_sampler_view *
+vl_zscan_linear(struct pipe_context *pipe, unsigned blocks_per_line)
+{
+   const unsigned total_size = blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT;
+
+   struct pipe_resource res_tmpl, *res;
+   struct pipe_sampler_view sv_tmpl, *sv;
+   struct pipe_transfer *buf_transfer;
+   unsigned x, y, i, pitch;
+   float *f;
+
+   struct pipe_box rect =
+   {
+      0, 0, 0,
+      BLOCK_WIDTH * blocks_per_line,
+      BLOCK_HEIGHT,
+      1
+   };
+
+   assert(pipe && blocks_per_line);
+
+   memset(&res_tmpl, 0, sizeof(res_tmpl));
+   res_tmpl.target = PIPE_TEXTURE_2D;
+   res_tmpl.format = PIPE_FORMAT_R32_FLOAT;
+   res_tmpl.width0 = BLOCK_WIDTH * blocks_per_line;
+   res_tmpl.height0 = BLOCK_HEIGHT;
+   res_tmpl.depth0 = 1;
+   res_tmpl.array_size = 1;
+   res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
+   res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
+
+   res = pipe->screen->resource_create(pipe->screen, &res_tmpl);
+   if (!res)
+      goto error_resource;
+
+   buf_transfer = pipe->get_transfer
+   (
+      pipe, res,
+      0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &rect
+   );
+   if (!buf_transfer)
+      goto error_transfer;
+
+   pitch = buf_transfer->stride / sizeof(float);
+
+   f = pipe->transfer_map(pipe, buf_transfer);
+   if (!f)
+      goto error_map;
+
+   for (i = 0; i < blocks_per_line; ++i)
+      for (y = 0; y < BLOCK_HEIGHT; ++y)
+         for (x = 0; x < BLOCK_WIDTH; ++x) {
+            float addr = x + y * BLOCK_WIDTH +
+               i * BLOCK_WIDTH * BLOCK_HEIGHT;
+
+            addr /= total_size;
+
+            f[i * BLOCK_WIDTH + y * pitch + x] = addr;
+         }
+
+   pipe->transfer_unmap(pipe, buf_transfer);
+   pipe->transfer_destroy(pipe, buf_transfer);
+
+   memset(&sv_tmpl, 0, sizeof(sv_tmpl));
+   u_sampler_view_default_template(&sv_tmpl, res, res->format);
+   sv = pipe->create_sampler_view(pipe, res, &sv_tmpl);
+   pipe_resource_reference(&res, NULL);
+   if (!sv)
+      goto error_map;
+
+   return sv;
+
+error_map:
+   pipe->transfer_destroy(pipe, buf_transfer);
+
+error_transfer:
+   pipe_resource_reference(&res, NULL);
+
+error_resource:
+   return NULL;
+}
+
+#if 0
+// TODO
+struct pipe_sampler_view *
+vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line);
+
+struct pipe_sampler_view *
+vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line);
+#endif
+
+bool
+vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
+              unsigned buffer_width, unsigned buffer_height,
+              unsigned blocks_per_line, unsigned blocks_total,
+              unsigned num_channels)
+{
+   assert(zscan && pipe);
+
+   zscan->pipe = pipe;
+   zscan->buffer_width = buffer_width;
+   zscan->buffer_height = buffer_height;
+   zscan->num_channels = num_channels;
+   zscan->blocks_per_line = blocks_per_line;
+   zscan->blocks_total = blocks_total;
+
+   if(!init_shaders(zscan))
+      return false;
+
+   if(!init_state(zscan)) {
+      cleanup_shaders(zscan);
+      return false;
+   }
+
+   return true;
+}
+
+void
+vl_zscan_cleanup(struct vl_zscan *zscan)
+{
+   assert(zscan);
+
+   cleanup_shaders(zscan);
+   cleanup_state(zscan);
+}
+
+void
+vl_zscan_set_layout(struct vl_zscan *zscan, struct pipe_sampler_view *layout)
+{
+   assert(zscan);
+   assert(layout);
+
+   pipe_sampler_view_reference(&zscan->scan, layout);
+}
+
+#if 0
+// TODO
+void
+vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
+#endif
+
+bool
+vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
+                     struct pipe_sampler_view *src, struct pipe_surface *dst)
+{
+   assert(zscan && buffer);
+
+   memset(buffer, 0, sizeof(struct vl_zscan_buffer));
+
+   buffer->zscan = zscan;
+
+   pipe_sampler_view_reference(&buffer->src, src);
+   pipe_sampler_view_reference(&buffer->scan, zscan->scan);
+   pipe_sampler_view_reference(&buffer->quant, zscan->quant);
+
+   buffer->viewport.scale[0] = dst->width;
+   buffer->viewport.scale[1] = dst->height;
+   buffer->viewport.scale[2] = 1;
+   buffer->viewport.scale[3] = 1;
+   buffer->viewport.translate[0] = 0;
+   buffer->viewport.translate[1] = 0;
+   buffer->viewport.translate[2] = 0;
+   buffer->viewport.translate[3] = 0;
+
+   buffer->fb_state.width = dst->width;
+   buffer->fb_state.height = dst->height;
+   buffer->fb_state.nr_cbufs = 1;
+   pipe_surface_reference(&buffer->fb_state.cbufs[0], dst);
+
+   return true;
+}
+
+void
+vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer)
+{
+   assert(buffer);
+
+   pipe_sampler_view_reference(&buffer->src, NULL);
+   pipe_sampler_view_reference(&buffer->scan, NULL);
+   pipe_sampler_view_reference(&buffer->quant, NULL);
+   pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL);
+}
+
+void
+vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)
+{
+   struct vl_zscan *zscan;
+
+   assert(buffer);
+
+   zscan = buffer->zscan;
+
+   zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state);
+   zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend);
+   zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 2, zscan->samplers);
+   zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state);
+   zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport);
+   zscan->pipe->set_fragment_sampler_views(zscan->pipe, 2, &buffer->src);
+   zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs);
+   zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs);
+   util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
+}
diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h
new file mode 100644
index 00000000000..28b990ca83b
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_zscan.h
@@ -0,0 +1,110 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_zscan_h
+#define vl_zscan_h
+
+#include <pipe/p_compiler.h>
+#include <pipe/p_state.h>
+
+/*
+ * shader based zscan and quantification
+ * expect usage of vl_vertex_buffers as a todo list
+ */
+struct vl_zscan
+{
+   struct pipe_context *pipe;
+
+   unsigned buffer_width;
+   unsigned buffer_height;
+
+   unsigned num_channels;
+
+   unsigned blocks_per_line;
+   unsigned blocks_total;
+
+   void *rs_state;
+   void *blend;
+
+   void *samplers[3];
+
+   void *vs, *fs;
+
+   struct pipe_sampler_view *scan;
+   struct pipe_sampler_view *quant;
+};
+
+struct vl_zscan_buffer
+{
+   struct vl_zscan *zscan;
+
+   struct pipe_viewport_state viewport;
+   struct pipe_framebuffer_state fb_state;
+
+   struct pipe_sampler_view *src, *scan, *quant;
+   struct pipe_surface *dst;
+};
+
+struct pipe_sampler_view *
+vl_zscan_linear(struct pipe_context *pipe, unsigned blocks_per_line);
+
+#if 0
+struct pipe_sampler_view *
+vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line);
+
+struct pipe_sampler_view *
+vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line);
+#endif
+
+bool
+vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
+              unsigned buffer_width, unsigned buffer_height,
+              unsigned blocks_per_line, unsigned blocks_total,
+              unsigned num_channels);
+
+void
+vl_zscan_set_layout(struct vl_zscan *zscan, struct pipe_sampler_view *layout);
+
+void
+vl_zscan_cleanup(struct vl_zscan *zscan);
+
+#if 0
+void
+vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
+#endif
+
+bool
+vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
+                     struct pipe_sampler_view *src, struct pipe_surface *dst);
+
+void
+vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer);
+
+void
+vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances);
+
+#endif
-- 
cgit v1.2.3


From 2e6274fc3b123e7de695038054b5cbd20b11559a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 23 Apr 2011 13:24:35 +0200
Subject: [g3dvl] make ycbcr stream and block data a public interface

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c       | 210 ++++++---------------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h       |   2 -
 src/gallium/auxiliary/vl/vl_vertex_buffers.c       |  43 +----
 src/gallium/auxiliary/vl/vl_vertex_buffers.h       |  13 +-
 src/gallium/include/pipe/p_video_context.h         |  24 ++-
 src/gallium/include/pipe/p_video_state.h           |  29 ++-
 .../state_trackers/vdpau/mpeg2_bitstream_parser.c  |   2 +
 .../state_trackers/vdpau/mpeg2_bitstream_parser.h  |   2 +
 src/gallium/state_trackers/xorg/xvmc/surface.c     | 105 +++++++++--
 .../state_trackers/xorg/xvmc/xvmc_private.h        |   7 +-
 10 files changed, 189 insertions(+), 248 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 3b1d26d3db8..dcef6207afd 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -38,12 +38,6 @@
 #define SCALE_FACTOR_SNORM (32768.0f / 256.0f)
 #define SCALE_FACTOR_SSCALED (1.0f / 256.0f)
 
-static const unsigned const_empty_block_mask_420[3][2][2] = {
-   { { 0x20, 0x10 },  { 0x08, 0x04 } },
-   { { 0x02, 0x02 },  { 0x02, 0x02 } },
-   { { 0x01, 0x01 },  { 0x01, 0x01 } }
-};
-
 static const enum pipe_format const_zscan_source_formats[] = {
    PIPE_FORMAT_R16_SNORM,
    PIPE_FORMAT_R16_SSCALED
@@ -78,108 +72,6 @@ static const enum pipe_format const_mc_source_formats[] = {
 static const unsigned num_mc_source_formats =
    sizeof(const_mc_source_formats) / sizeof(enum pipe_format);
 
-static void
-map_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
-{
-   struct pipe_sampler_view **sampler_views;
-   struct pipe_resource *tex;
-   unsigned i;
-
-   assert(ctx && buffer);
-
-   sampler_views = buffer->zscan_source->get_sampler_views(buffer->zscan_source);
-
-   assert(sampler_views);
-
-   for (i = 0; i < VL_MAX_PLANES; ++i) {
-      tex = sampler_views[i]->texture;
-
-      struct pipe_box rect =
-      {
-         0, 0, 0,
-         tex->width0,
-         tex->height0,
-         1
-      };
-
-      buffer->tex_transfer[i] = ctx->pipe->get_transfer
-      (
-         ctx->pipe, tex,
-         0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
-         &rect
-      );
-
-      buffer->texels[i] = ctx->pipe->transfer_map(ctx->pipe, buffer->tex_transfer[i]);
-   }
-}
-
-static void
-upload_block(struct vl_mpeg12_buffer *buffer, unsigned plane,
-             unsigned x, unsigned y, short *block,
-             bool intra, enum pipe_mpeg12_dct_type type)
-{
-   short *texels;
-   unsigned idx;
-
-   assert(buffer);
-   assert(block);
-
-   idx = vl_vb_add_ycbcr(&buffer->vertex_stream, plane, x, y, intra, type);
-
-   texels = buffer->texels[plane] + idx * BLOCK_WIDTH * BLOCK_HEIGHT;
-
-   memcpy(texels, block, BLOCK_WIDTH * BLOCK_HEIGHT * sizeof(short));
-}
-
-static void
-upload_buffer(struct vl_mpeg12_decoder *ctx,
-              struct vl_mpeg12_buffer *buffer,
-              struct pipe_mpeg12_macroblock *mb)
-{
-   short *blocks;
-   unsigned tb, x, y;
-
-   assert(ctx);
-   assert(buffer);
-   assert(mb);
-
-   blocks = mb->blocks;
-
-   for (y = 0; y < 2; ++y) {
-      for (x = 0; x < 2; ++x, ++tb) {
-         if (mb->cbp & (*ctx->empty_block_mask)[0][y][x]) {
-            upload_block(buffer, 0, mb->mbx * 2 + x, mb->mby * 2 + y, blocks,
-                         mb->dct_intra, mb->dct_type);
-            blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
-         }
-      }
-   }
-
-   /* TODO: Implement 422, 444 */
-   assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-
-   for (tb = 1; tb < 3; ++tb) {
-      if (mb->cbp & (*ctx->empty_block_mask)[tb][0][0]) {
-         upload_block(buffer, tb, mb->mbx, mb->mby, blocks,
-                      mb->dct_intra, mb->dct_type);
-         blocks += BLOCK_WIDTH * BLOCK_HEIGHT;
-      }
-   }
-}
-
-static void
-unmap_buffers(struct vl_mpeg12_decoder *ctx, struct vl_mpeg12_buffer *buffer)
-{
-   unsigned i;
-
-   assert(ctx && buffer);
-
-   for (i = 0; i < VL_MAX_PLANES; ++i) {
-      ctx->pipe->transfer_unmap(ctx->pipe, buffer->tex_transfer[i]);
-      ctx->pipe->transfer_destroy(ctx->pipe, buffer->tex_transfer[i]);
-   }
-}
-
 static bool
 init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
 {
@@ -364,57 +256,81 @@ vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
    struct vl_mpeg12_decoder *dec;
+
+   struct pipe_sampler_view **sampler_views;
+   unsigned i;
+
    assert(buf);
 
    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
    assert(dec);
 
    vl_vb_map(&buf->vertex_stream, dec->pipe);
-   map_buffers(dec, buf);
+
+   sampler_views = buf->zscan_source->get_sampler_views(buf->zscan_source);
+
+   assert(sampler_views);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      struct pipe_resource *tex = sampler_views[i]->texture;
+      struct pipe_box rect =
+      {
+         0, 0, 0,
+         tex->width0,
+         tex->height0,
+         1
+      };
+
+      buf->tex_transfer[i] = dec->pipe->get_transfer
+      (
+         dec->pipe, tex,
+         0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+         &rect
+      );
+
+      buf->texels[i] = dec->pipe->transfer_map(dec->pipe, buf->tex_transfer[i]);
+   }
 }
 
-static unsigned
-vl_mpeg12_buffer_get_mv_stream_stride(struct pipe_video_decode_buffer *buffer)
+static struct pipe_ycbcr_block *
+vl_mpeg12_buffer_get_ycbcr_stream(struct pipe_video_decode_buffer *buffer, int component)
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 
    assert(buf);
 
-   return vl_vb_get_mv_stream_stride(&buf->vertex_stream);
+   return vl_vb_get_ycbcr_stream(&buf->vertex_stream, component);
 }
 
-static struct pipe_motionvector *
-vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_frame)
+static short *
+vl_mpeg12_buffer_get_ycbcr_buffer(struct pipe_video_decode_buffer *buffer, int component)
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 
    assert(buf);
+   assert(component < VL_MAX_PLANES);
 
-   return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame);
+   return buf->texels[component];
 }
 
-static void
-vl_mpeg12_buffer_add_macroblocks(struct pipe_video_decode_buffer *buffer,
-                                 unsigned num_macroblocks,
-                                 struct pipe_macroblock *macroblocks)
+static unsigned
+vl_mpeg12_buffer_get_mv_stream_stride(struct pipe_video_decode_buffer *buffer)
 {
-   struct pipe_mpeg12_macroblock *mb = (struct pipe_mpeg12_macroblock*)macroblocks;
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   struct vl_mpeg12_decoder *dec;
-   unsigned i;
 
    assert(buf);
 
-   dec =  (struct vl_mpeg12_decoder*)buf->base.decoder;
-   assert(dec);
+   return vl_vb_get_mv_stream_stride(&buf->vertex_stream);
+}
 
-   assert(num_macroblocks);
-   assert(macroblocks);
-   assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
+static struct pipe_motionvector *
+vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_frame)
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
 
-   for ( i = 0; i < num_macroblocks; ++i ) {
-      upload_buffer(dec, buf, &mb[i]);
-   }
+   assert(buf);
+
+   return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame);
 }
 
 static void
@@ -422,13 +338,19 @@ vl_mpeg12_buffer_unmap(struct pipe_video_decode_buffer *buffer)
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
    struct vl_mpeg12_decoder *dec;
+   unsigned i;
+
    assert(buf);
 
    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
    assert(dec);
 
    vl_vb_unmap(&buf->vertex_stream, dec->pipe);
-   unmap_buffers(dec, buf);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      dec->pipe->transfer_unmap(dec->pipe, buf->tex_transfer[i]);
+      dec->pipe->transfer_destroy(dec->pipe, buf->tex_transfer[i]);
+   }
 }
 
 static void
@@ -483,9 +405,10 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    buffer->base.decoder = decoder;
    buffer->base.destroy = vl_mpeg12_buffer_destroy;
    buffer->base.map = vl_mpeg12_buffer_map;
+   buffer->base.get_ycbcr_stream = vl_mpeg12_buffer_get_ycbcr_stream;
+   buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer;
    buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride;
    buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream;
-   buffer->base.add_macroblocks = vl_mpeg12_buffer_add_macroblocks;
    buffer->base.unmap = vl_mpeg12_buffer_unmap;
 
    vl_vb_init(&buffer->vertex_stream, dec->pipe,
@@ -550,6 +473,7 @@ error_vertex_stream:
 
 static void
 vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
+                               unsigned num_ycbcr_blocks[3],
                                struct pipe_video_buffer *refs[2],
                                struct pipe_video_buffer *dst,
                                struct pipe_fence_handle **fence)
@@ -593,34 +517,22 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 
    dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
-      unsigned num_instances = vl_vb_restart(&buf->vertex_stream, i);
+      if (num_ycbcr_blocks[i] == 0) continue;
 
       vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
       dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
 
-      vl_zscan_render(&buf->zscan[i] , num_instances);
+      vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]);
 
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-         vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_instances);
+         vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_ycbcr_blocks[i]);
 
-      vl_mc_render_ycbcr(&buf->mc[i], num_instances);
+      vl_mc_render_ycbcr(&buf->mc[i], num_ycbcr_blocks[i]);
    }
 
    dec->pipe->flush(dec->pipe, fence);
 }
 
-static void
-vl_mpeg12_decoder_clear_buffer(struct pipe_video_decode_buffer *buffer)
-{
-   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
-   unsigned i;
-
-   assert(buf);
-
-   for (i = 0; i < VL_MAX_PLANES; ++i)
-      vl_vb_restart(&buf->vertex_stream, i);
-}
-
 static bool
 init_pipe_state(struct vl_mpeg12_decoder *dec)
 {
@@ -819,7 +731,6 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    dec->base.destroy = vl_mpeg12_destroy;
    dec->base.create_buffer = vl_mpeg12_create_buffer;
    dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer;
-   dec->base.clear_buffer = vl_mpeg12_decoder_clear_buffer;
 
    dec->base.width = align(width, MACROBLOCK_WIDTH);
    dec->base.height = align(height, MACROBLOCK_HEIGHT);
@@ -838,7 +749,6 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
 
    /* TODO: Implement 422, 444 */
    assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
-   dec->empty_block_mask = &const_empty_block_mask_420;
 
    dec->mc_source_format = find_first_supported_format(dec, const_mc_source_formats,
                                                        num_mc_source_formats, PIPE_TEXTURE_3D);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index b94f12a9b7a..9d5768816fb 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -49,8 +49,6 @@ struct vl_mpeg12_decoder
 
    unsigned blocks_per_line;
    unsigned max_blocks;
-
-   const unsigned (*empty_block_mask)[3][2][2];
    unsigned nr_of_idct_render_targets;
 
    enum pipe_format zscan_source_format;
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index d2025f76b86..d2e03988acf 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -30,14 +30,6 @@
 #include "vl_vertex_buffers.h"
 #include "vl_types.h"
 
-struct vl_ycbcr_vertex_stream
-{
-   uint8_t x;
-   uint8_t y;
-   uint8_t intra;
-   uint8_t field;
-};
-
 /* vertices for a quad covering a block */
 static const struct vertex2f block_quad[4] = {
    {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
@@ -221,13 +213,12 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
    size = width * height;
 
    for (i = 0; i < VL_MAX_PLANES; ++i) {
-      buffer->ycbcr[i].num_instances = 0;
       buffer->ycbcr[i].resource = pipe_buffer_create
       (
          pipe->screen,
          PIPE_BIND_VERTEX_BUFFER,
          PIPE_USAGE_STREAM,
-         sizeof(struct vl_ycbcr_vertex_stream) * size * 4
+         sizeof(struct pipe_ycbcr_block) * size * 4
       );
    }
 
@@ -251,7 +242,7 @@ vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component)
 
    assert(buffer);
 
-   buf.stride = sizeof(struct vl_ycbcr_vertex_stream);
+   buf.stride = sizeof(struct pipe_ycbcr_block);
    buf.buffer_offset = 0;
    buf.buffer = buffer->ycbcr[component].resource;
 
@@ -301,23 +292,13 @@ vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
 
 }
 
-unsigned
-vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
-                unsigned component, unsigned x, unsigned y,
-                bool intra, enum pipe_mpeg12_dct_type type)
+struct pipe_ycbcr_block *
+vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component)
 {
-   struct vl_ycbcr_vertex_stream *stream;
-
    assert(buffer);
-   assert(buffer->ycbcr[component].num_instances < buffer->width * buffer->height * 4);
-
-   stream = buffer->ycbcr[component].vertex_stream++;
-   stream->x = x;
-   stream->y = y;
-   stream->intra = intra;
-   stream->field = type == PIPE_MPEG12_DCT_TYPE_FIELD;
+   assert(component < VL_MAX_PLANES);
 
-   return buffer->ycbcr[component].num_instances++;
+   return buffer->ycbcr[component].vertex_stream;
 }
 
 unsigned
@@ -353,18 +334,6 @@ vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe)
    }
 }
 
-unsigned
-vl_vb_restart(struct vl_vertex_buffer *buffer, int component)
-{
-   unsigned num_instances;
-
-   assert(buffer);
-
-   num_instances = buffer->ycbcr[component].num_instances;
-   buffer->ycbcr[component].num_instances = 0;
-   return num_instances;
-}
-
 void
 vl_vb_cleanup(struct vl_vertex_buffer *buffer)
 {
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 89d455225ad..5293820ffca 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -55,10 +55,9 @@ struct vl_vertex_buffer
    unsigned width, height;
 
    struct {
-      unsigned                      num_instances;
-      struct pipe_resource          *resource;
-      struct pipe_transfer          *transfer;
-      struct vl_ycbcr_vertex_stream *vertex_stream;
+      struct pipe_resource    *resource;
+      struct pipe_transfer    *transfer;
+      struct pipe_ycbcr_block *vertex_stream;
    } ycbcr[VL_MAX_PLANES];
 
    struct {
@@ -84,9 +83,7 @@ void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
 struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component);
 
-unsigned vl_vb_add_ycbcr(struct vl_vertex_buffer *buffer,
-                         unsigned component, unsigned x, unsigned y,
-                         bool intra, enum pipe_mpeg12_dct_type type);
+struct pipe_ycbcr_block *vl_vb_get_ycbcr_stream(struct vl_vertex_buffer *buffer, int component);
 
 struct pipe_vertex_buffer vl_vb_get_mv(struct vl_vertex_buffer *buffer, int ref_frame);
 
@@ -96,8 +93,6 @@ struct pipe_motionvector *vl_vb_get_mv_stream(struct vl_vertex_buffer *buffer, i
 
 void vl_vb_unmap(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
-unsigned vl_vb_restart(struct vl_vertex_buffer *buffer, int component);
-
 void vl_vb_cleanup(struct vl_vertex_buffer *buffer);
 
 #endif /* vl_vertex_buffers_h */
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 1eb96420fb7..8775bbb2705 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -158,15 +158,10 @@ struct pipe_video_decoder
     * flush decoder buffer to video hardware
     */
    void (*flush_buffer)(struct pipe_video_decode_buffer *decbuf,
+                        unsigned num_ycbcr_blocks[3],
                         struct pipe_video_buffer *ref_frames[2],
                         struct pipe_video_buffer *dst,
                         struct pipe_fence_handle **fence);
-
-   /**
-    * clear decoder buffers todo list
-    */
-   void (*clear_buffer)(struct pipe_video_decode_buffer *decbuf);
-
 };
 
 /**
@@ -186,6 +181,16 @@ struct pipe_video_decode_buffer
     */
    void (*map)(struct pipe_video_decode_buffer *decbuf);
 
+   /**
+    * get the pointer where to put the ycbcr blocks of a component
+    */
+   struct pipe_ycbcr_block *(*get_ycbcr_stream)(struct pipe_video_decode_buffer *, int component);
+
+   /**
+    * get the pointer where to put the ycbcr dct block data of a component
+    */
+   short *(*get_ycbcr_buffer)(struct pipe_video_decode_buffer *, int component);
+
    /**
     * get the stride of the mv buffer
     */
@@ -205,13 +210,6 @@ struct pipe_video_decode_buffer
                             struct pipe_buffer **bitstream_buf);
 #endif
 
-   /**
-    * add macroblocks to decoder buffer
-    */
-   void (*add_macroblocks)(struct pipe_video_decode_buffer *decbuf,
-                           unsigned num_macroblocks,
-                           struct pipe_macroblock *macroblocks);
-
    /**
     * unmap decoder buffer before flushing
     */
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index dcb64d3c220..8bd84c2846a 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -50,10 +50,16 @@ enum pipe_mpeg12_picture_type
    PIPE_MPEG12_PICTURE_TYPE_FRAME
 };
 
+enum pipe_mpeg12_dct_intra
+{
+   PIPE_MPEG12_DCT_DELTA = 0,
+   PIPE_MPEG12_DCT_INTRA = 1
+};
+
 enum pipe_mpeg12_dct_type
 {
-   PIPE_MPEG12_DCT_TYPE_FIELD,
-   PIPE_MPEG12_DCT_TYPE_FRAME
+   PIPE_MPEG12_DCT_TYPE_FRAME = 0,
+   PIPE_MPEG12_DCT_TYPE_FIELD = 1
 };
 
 enum pipe_video_field_select
@@ -75,11 +81,6 @@ enum pipe_video_mv_weight
    PIPE_VIDEO_MV_WEIGHT_MAX = 256
 };
 
-struct pipe_macroblock
-{
-   enum pipe_video_codec codec;
-};
-
 /* bitfields because this is used as a vertex buffer element */
 struct pipe_motionvector
 {
@@ -90,16 +91,12 @@ struct pipe_motionvector
    } top, bottom;
 };
 
-struct pipe_mpeg12_macroblock
+/* bitfields because this is used as a vertex buffer element */
+struct pipe_ycbcr_block
 {
-   struct pipe_macroblock base;
-
-   unsigned mbx;
-   unsigned mby;
-   bool dct_intra;
-   enum pipe_mpeg12_dct_type dct_type;
-   unsigned cbp;
-   short *blocks;
+   unsigned x:8, y:8;
+   enum pipe_mpeg12_dct_intra intra:8;
+   enum pipe_mpeg12_dct_type coding:8;
 };
 
 #if 0
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
index 7b285079a19..182f3d44c45 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
@@ -28,6 +28,7 @@
 #include <stdlib.h>
 #include "mpeg2_bitstream_parser.h"
 
+#if 0
 int
 vlVdpMPEG2NextStartCode(struct vdpMPEG2BitstreamParser *parser)
 {
@@ -130,3 +131,4 @@ vlVdpMPEG2BitstreamToMacroblock(struct pipe_screen *screen,
    return 0;
 }
 
+#endif
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
index 1fa425fdcdb..2f8a14996c5 100644
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
+++ b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
@@ -53,11 +53,13 @@ struct vdpMPEG2BitstreamParser
    uint32_t horizontal_size_value;
 };
 
+#if 0
 int
 vlVdpMPEG2BitstreamToMacroblock(struct pipe_screen *screen,
                                 VdpBitstreamBuffer const *bitstream_buffers,
                                 uint32_t bitstream_buffer_count,
                                 unsigned int *num_macroblocks,
                                 struct pipe_mpeg12_macroblock **pipe_macroblocks);
+#endif
 
 #endif // MPEG2_BITSTREAM_PARSER_H
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 567484e993d..f8a0f3c7fd9 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -42,6 +42,12 @@
 
 #include "xvmc_private.h"
 
+static const unsigned const_empty_block_mask_420[3][2][2] = {
+   { { 0x20, 0x10 },  { 0x08, 0x04 } },
+   { { 0x02, 0x02 },  { 0x02, 0x02 } },
+   { { 0x01, 0x01 },  { 0x01, 0x01 } }
+};
+
 static enum pipe_mpeg12_picture_type PictureToPipe(int xvmc_pic)
 {
    switch (xvmc_pic) {
@@ -136,34 +142,89 @@ MotionVectorToPipe(const XvMCMacroBlock *xvmc_mb, unsigned vector,
    return mv;
 }
 
+static inline void
+UploadYcbcrBlocks(XvMCSurfacePrivate *surface,
+                  const XvMCMacroBlock *xvmc_mb,
+                  const XvMCBlockArray *xvmc_blocks)
+{
+   enum pipe_mpeg12_dct_intra intra;
+   enum pipe_mpeg12_dct_type coding;
+
+   unsigned tb, x, y;
+   short *blocks;
+
+   assert(surface);
+   assert(xvmc_mb);
+
+   intra = xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA ?
+           PIPE_MPEG12_DCT_INTRA : PIPE_MPEG12_DCT_DELTA;
+
+   coding = xvmc_mb->dct_type == XVMC_DCT_TYPE_FIELD ?
+            PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
+
+   blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
+
+   for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x, ++tb) {
+         if (xvmc_mb->coded_block_pattern & const_empty_block_mask_420[0][y][x]) {
+
+            struct pipe_ycbcr_block *stream = surface->ycbcr[0].stream;
+            stream->x = xvmc_mb->x * 2 + x;
+            stream->y = xvmc_mb->y * 2 + y;
+            stream->intra = intra;
+            stream->coding = coding;
+
+            memcpy(surface->ycbcr[0].buffer, blocks, BLOCK_SIZE_BYTES);
+
+            surface->ycbcr[0].num_blocks_added++;
+            surface->ycbcr[0].stream++;
+            surface->ycbcr[0].buffer += BLOCK_SIZE_SAMPLES;
+            blocks += BLOCK_SIZE_SAMPLES;
+         }
+      }
+   }
+
+   /* TODO: Implement 422, 444 */
+   //assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
+
+   for (tb = 1; tb < 3; ++tb) {
+      if (xvmc_mb->coded_block_pattern & const_empty_block_mask_420[tb][0][0]) {
+
+         struct pipe_ycbcr_block *stream = surface->ycbcr[tb].stream;
+         stream->x = xvmc_mb->x;
+         stream->y = xvmc_mb->y;
+         stream->intra = intra;
+         stream->coding = PIPE_MPEG12_DCT_TYPE_FRAME;
+
+         memcpy(surface->ycbcr[tb].buffer, blocks, BLOCK_SIZE_BYTES);
+
+         surface->ycbcr[tb].num_blocks_added++;
+         surface->ycbcr[tb].stream++;
+         surface->ycbcr[tb].buffer += BLOCK_SIZE_SAMPLES;
+         blocks += BLOCK_SIZE_SAMPLES;
+      }
+   }
+
+}
+
 static void
 MacroBlocksToPipe(XvMCSurfacePrivate *surface,
                   unsigned int xvmc_picture_structure,
                   const XvMCMacroBlock *xvmc_mb,
                   const XvMCBlockArray *xvmc_blocks,
-                  unsigned int num_macroblocks,
-                  struct pipe_mpeg12_macroblock *mb)
+                  unsigned int num_macroblocks)
 {
    unsigned int i, j;
 
    assert(xvmc_mb);
    assert(xvmc_blocks);
-   assert(mb);
    assert(num_macroblocks);
 
    for (i = 0; i < num_macroblocks; ++i) {
       unsigned mv_pos = xvmc_mb->x + surface->mv_stride * xvmc_mb->y;
       unsigned mv_weights[2];
 
-      mb->base.codec = PIPE_VIDEO_CODEC_MPEG12;
-      mb->mbx = xvmc_mb->x;
-      mb->mby = xvmc_mb->y;
-
-      mb->dct_intra = xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA;
-      mb->dct_type = xvmc_mb->dct_type == XVMC_DCT_TYPE_FIELD ?
-         PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
-      mb->cbp = xvmc_mb->coded_block_pattern;
-      mb->blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
+      UploadYcbcrBlocks(surface, xvmc_mb, xvmc_blocks);
 
       MacroBlockTypeToPipeWeights(xvmc_mb, mv_weights);
 
@@ -176,10 +237,8 @@ MacroBlocksToPipe(XvMCSurfacePrivate *surface,
             j ? XVMC_SELECT_FIRST_BACKWARD : XVMC_SELECT_FIRST_FORWARD,
             mv_weights[j]
          );
-
       }
 
-      ++mb;
       ++xvmc_mb;
    }
 }
@@ -189,7 +248,7 @@ unmap_and_flush_surface(XvMCSurfacePrivate *surface)
 {
    struct pipe_video_buffer *ref_frames[2];
    XvMCContextPrivate *context_priv;
-   unsigned i;
+   unsigned i, num_ycbcr_blocks[3];
 
    assert(surface);
 
@@ -211,7 +270,10 @@ unmap_and_flush_surface(XvMCSurfacePrivate *surface)
 
    if (surface->mapped) {
       surface->decode_buffer->unmap(surface->decode_buffer);
+      for (i = 0; i < 3; ++i)
+         num_ycbcr_blocks[i] = surface->ycbcr[i].num_blocks_added;
       context_priv->decoder->flush_buffer(surface->decode_buffer,
+                                          num_ycbcr_blocks,
                                           ref_frames,
                                           surface->video_buffer,
                                           &surface->flush_fence);
@@ -289,8 +351,6 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
 
    unsigned i;
 
-   struct pipe_mpeg12_macroblock pipe_macroblocks[num_macroblocks];
-
    XVMC_MSG(XVMC_TRACE, "[XvMC] Rendering to surface %p, with past %p and future %p\n",
             target_surface, past_surface, future_surface);
 
@@ -357,6 +417,12 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    if (!target_surface_priv->mapped) {
       t_buffer->map(t_buffer);
 
+      for (i = 0; i < 3; ++i) {
+         target_surface_priv->ycbcr[i].num_blocks_added = 0;
+         target_surface_priv->ycbcr[i].stream = t_buffer->get_ycbcr_stream(t_buffer, i);
+         target_surface_priv->ycbcr[i].buffer = t_buffer->get_ycbcr_buffer(t_buffer, i);
+      }
+
       for (i = 0; i < 2; ++i) {
          target_surface_priv->ref[i].surface = i == 0 ? past_surface : future_surface;
 
@@ -365,12 +431,11 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
          else
             target_surface_priv->ref[i].mv = NULL;
       }
+
       target_surface_priv->mapped = 1;
    }
 
-   MacroBlocksToPipe(target_surface_priv, picture_structure, xvmc_mb, blocks, num_macroblocks, pipe_macroblocks);
-
-   t_buffer->add_macroblocks(t_buffer, num_macroblocks, &pipe_macroblocks->base);
+   MacroBlocksToPipe(target_surface_priv, picture_structure, xvmc_mb, blocks, num_macroblocks);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for rendering.\n", target_surface);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index a85d58a519c..bdcda3bb56f 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -71,8 +71,13 @@ typedef struct
 
    bool mapped; // are we still mapped to memory?
 
-   unsigned mv_stride;
+   struct {
+      unsigned num_blocks_added;
+      struct pipe_ycbcr_block *stream;
+      short *buffer;
+   } ycbcr[3];
 
+   unsigned mv_stride;
    struct {
       XvMCSurface *surface;
       struct pipe_motionvector *mv;
-- 
cgit v1.2.3


From 24d76d2966a5c666c9627034e6751621b17024c8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 23 Apr 2011 13:58:08 +0200
Subject: [g3dvl] cleanup error handling

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 125 +++++++++++++++++----------
 src/gallium/auxiliary/vl/vl_vertex_buffers.c |  16 +++-
 src/gallium/auxiliary/vl/vl_vertex_buffers.h |   2 +-
 3 files changed, 95 insertions(+), 48 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index dcef6207afd..16b204df676 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -226,12 +226,74 @@ cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
    buf->idct_intermediate->destroy(buf->idct_intermediate);
 }
 
+static bool
+init_mc_buffer(struct vl_mpeg12_buffer *buf)
+{
+   struct vl_mpeg12_decoder *dec;
+   enum pipe_format formats[3];
+   struct pipe_sampler_view **mc_source_sv;
+
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
+   assert(dec);
+
+   formats[0] = formats[1] = formats[2] =dec->mc_source_format;
+   buf->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                            dec->base.width, dec->base.height, 1,
+                                            dec->base.chroma_format,
+                                            formats, PIPE_USAGE_STATIC);
+
+   if (!buf->mc_source)
+      goto error_mc_source;
+
+   mc_source_sv = buf->mc_source->get_sampler_views(buf->mc_source);
+   if (!mc_source_sv)
+      goto error_mc_source_sv;
+
+   if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0], mc_source_sv[0]))
+      goto error_mc_y;
+
+   if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[1], mc_source_sv[1]))
+      goto error_mc_cb;
+
+   if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[2], mc_source_sv[2]))
+      goto error_mc_cr;
+
+   return true;
+
+error_mc_cr:
+   vl_mc_cleanup_buffer(&buf->mc[1]);
+
+error_mc_cb:
+   vl_mc_cleanup_buffer(&buf->mc[0]);
+
+error_mc_y:
+error_mc_source_sv:
+   buf->mc_source->destroy(buf->mc_source);
+
+error_mc_source:
+   return false;
+}
+
+static void
+cleanup_mc_buffer(struct vl_mpeg12_buffer *buf)
+{
+   unsigned i;
+
+   assert(buf);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      vl_mc_cleanup_buffer(&buf->mc[i]);
+
+   buf->mc_source->destroy(buf->mc_source);
+}
+
 static void
 vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
    struct vl_mpeg12_decoder *dec;
-   unsigned i;
 
    assert(buf);
 
@@ -243,10 +305,9 @@ vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
       cleanup_idct_buffer(buf);
 
-   buf->mc_source->destroy(buf->mc_source);
+   cleanup_mc_buffer(buf);
+
    vl_vb_cleanup(&buf->vertex_stream);
-   for (i = 0; i < VL_MAX_PLANES; ++i)
-      vl_mc_cleanup_buffer(&buf->mc[i]);
 
    FREE(buf);
 }
@@ -389,13 +450,9 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
 static struct pipe_video_decode_buffer *
 vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
 {
-   enum pipe_format formats[3];
-
    struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder*)decoder;
    struct vl_mpeg12_buffer *buffer;
 
-   struct pipe_sampler_view **mc_source_sv;
-
    assert(dec);
 
    buffer = CALLOC_STRUCT(vl_mpeg12_buffer);
@@ -411,31 +468,13 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream;
    buffer->base.unmap = vl_mpeg12_buffer_unmap;
 
-   vl_vb_init(&buffer->vertex_stream, dec->pipe,
-              dec->base.width / MACROBLOCK_WIDTH,
-              dec->base.height / MACROBLOCK_HEIGHT);
+   if (!vl_vb_init(&buffer->vertex_stream, dec->pipe,
+                   dec->base.width / MACROBLOCK_WIDTH,
+                   dec->base.height / MACROBLOCK_HEIGHT))
+      goto error_vertex_buffer;
 
-   formats[0] = formats[1] = formats[2] =dec->mc_source_format;
-   buffer->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                            dec->base.width, dec->base.height, 1,
-                                            dec->base.chroma_format,
-                                            formats, PIPE_USAGE_STATIC);
-
-   if (!buffer->mc_source)
-      goto error_mc_source;
-
-   mc_source_sv = buffer->mc_source->get_sampler_views(buffer->mc_source);
-   if (!mc_source_sv)
-      goto error_mc_source_sv;
-
-   if(!vl_mc_init_buffer(&dec->mc_y, &buffer->mc[0], mc_source_sv[0]))
-      goto error_mc_y;
-
-   if(!vl_mc_init_buffer(&dec->mc_c, &buffer->mc[1], mc_source_sv[1]))
-      goto error_mc_cb;
-
-   if(!vl_mc_init_buffer(&dec->mc_c, &buffer->mc[2], mc_source_sv[2]))
-      goto error_mc_cr;
+   if (!init_mc_buffer(buffer))
+      goto error_mc;
 
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
       if (!init_idct_buffer(buffer))
@@ -447,26 +486,16 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    return &buffer->base;
 
 error_zscan:
-   // TODO Cleanup error handling
-
-error_mc_cr:
-   vl_mc_cleanup_buffer(&buffer->mc[1]);
-
-error_mc_cb:
-   vl_mc_cleanup_buffer(&buffer->mc[0]);
-
-error_mc_y:
-error_mc_source_sv:
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
       cleanup_idct_buffer(buffer);
 
 error_idct:
-   buffer->mc_source->destroy(buffer->mc_source);
+   cleanup_mc_buffer(buffer);
 
-error_mc_source:
+error_mc:
    vl_vb_cleanup(&buffer->vertex_stream);
 
-error_vertex_stream:
+error_vertex_buffer:
    FREE(buffer);
    return NULL;
 }
@@ -768,7 +797,7 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    }
 
    if (!init_zscan(dec))
-      return NULL; // TODO error handling
+      goto error_zscan;
 
    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
       if (!init_idct(dec))
@@ -818,6 +847,10 @@ error_mc_y:
    }
 
 error_idct:
+   vl_zscan_cleanup(&dec->zscan_y);
+   vl_zscan_cleanup(&dec->zscan_c);
+
+error_zscan:
    FREE(dec);
    return NULL;
 }
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index d2e03988acf..c45f96cec51 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -199,7 +199,7 @@ vl_vb_get_ves_mv(struct pipe_context *pipe)
    return pipe->create_vertex_elements_state(pipe, NUM_VS_INPUTS, vertex_elems);
 }
 
-void
+bool
 vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
            unsigned width, unsigned height)
 {
@@ -220,6 +220,8 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
          PIPE_USAGE_STREAM,
          sizeof(struct pipe_ycbcr_block) * size * 4
       );
+      if (!buffer->ycbcr[i].resource)
+         goto error_ycbcr;
    }
 
    for (i = 0; i < VL_MAX_REF_FRAMES; ++i) {
@@ -230,9 +232,21 @@ vl_vb_init(struct vl_vertex_buffer *buffer, struct pipe_context *pipe,
          PIPE_USAGE_STREAM,
          sizeof(struct pipe_motionvector) * size
       );
+      if (!buffer->mv[i].resource)
+         goto error_mv;
    }
 
    vl_vb_map(buffer, pipe);
+   return true;
+
+error_mv:
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      pipe_resource_reference(&buffer->mv[i].resource, NULL);
+
+error_ycbcr:
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      pipe_resource_reference(&buffer->ycbcr[i].resource, NULL);
+   return false;
 }
 
 struct pipe_vertex_buffer
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 5293820ffca..7e727cbca12 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -75,7 +75,7 @@ void *vl_vb_get_ves_ycbcr(struct pipe_context *pipe);
 
 void *vl_vb_get_ves_mv(struct pipe_context *pipe);
 
-void vl_vb_init(struct vl_vertex_buffer *buffer,
+bool vl_vb_init(struct vl_vertex_buffer *buffer,
                 struct pipe_context *pipe,
                 unsigned width, unsigned height);
 
-- 
cgit v1.2.3


From 68cc6bc5d8b6986acc7f5780d705f4ae9be2a446 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 24 Apr 2011 12:47:42 +0200
Subject: r600g: remove the unneeded bo from COLOR[0-9]_INFO

The COLOR[0-7]_INFO registers doesn't neeed a bo.
---
 src/gallium/drivers/r600/r600_state.c         |  2 +-
 src/gallium/winsys/r600/drm/r600_hw_context.c | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index ac2e8986b97..da4f2706eef 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -787,7 +787,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 				(offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
 	r600_pipe_state_add_reg(rstate,
 				R_0280A0_CB_COLOR0_INFO + cb * 4,
-				color_info, 0xFFFFFFFF, bo[0]);
+				color_info, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate,
 				R_028060_CB_COLOR0_SIZE + cb * 4,
 				S_028060_PITCH_TILE_MAX(pitch) |
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 662455be28c..6be05bd90f8 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -199,7 +199,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028040_CB_COLOR0_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A0_CB_COLOR0_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A0_CB_COLOR0_INFO, 0, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028060_CB_COLOR0_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028080_CB_COLOR0_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -210,7 +210,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028044_CB_COLOR1_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A4_CB_COLOR1_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A4_CB_COLOR1_INFO, 0, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028064_CB_COLOR1_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028084_CB_COLOR1_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -221,7 +221,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028048_CB_COLOR2_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A8_CB_COLOR2_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A8_CB_COLOR2_INFO, 0, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028068_CB_COLOR2_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028088_CB_COLOR2_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -232,7 +232,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02804C_CB_COLOR3_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280AC_CB_COLOR3_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280AC_CB_COLOR3_INFO, 0, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02806C_CB_COLOR3_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02808C_CB_COLOR3_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -243,7 +243,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028050_CB_COLOR4_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B0_CB_COLOR4_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B0_CB_COLOR4_INFO, 0, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028070_CB_COLOR4_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028090_CB_COLOR4_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -254,7 +254,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028054_CB_COLOR5_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B4_CB_COLOR5_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B4_CB_COLOR5_INFO, 0, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028074_CB_COLOR5_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028094_CB_COLOR5_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -263,7 +263,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280D4_CB_COLOR5_TILE, REG_FLAG_NEED_BO, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028114_CB_COLOR5_MASK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028058_CB_COLOR6_BASE, REG_FLAG_NEED_BO, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B8_CB_COLOR6_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B8_CB_COLOR6_INFO, 0, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028078_CB_COLOR6_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028098_CB_COLOR6_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -274,7 +274,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02805C_CB_COLOR7_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280BC_CB_COLOR7_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280BC_CB_COLOR7_INFO, 0, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02807C_CB_COLOR7_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02809C_CB_COLOR7_VIEW, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280FC_CB_COLOR7_FRAG, REG_FLAG_NEED_BO, 0, 0},
-- 
cgit v1.2.3


From 4e6528428d4efa6fb2e5101bce2544d8304da3de Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 24 Apr 2011 12:57:35 +0200
Subject: r600g: implement clamp_fragment_color handling

The first small step to get arb_color_buffer_float working.
---
 src/gallium/drivers/r600/r600_state.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index da4f2706eef..8646b9f4905 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -285,7 +285,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
 {
 	struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer);
 	struct r600_pipe_state *rstate;
-	unsigned tmp;
+	unsigned tmp, cb;
 	unsigned prov_vtx = 1, polygon_dual_mode;
 	unsigned clip_rule;
 
@@ -358,6 +358,11 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
 	r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL);
 
+	for (cb = 0; cb < 7; ++cb)
+		r600_pipe_state_add_reg(rstate, R_0280A0_CB_COLOR0_INFO + cb * 4,
+					S_0280A0_BLEND_CLAMP(state->clamp_fragment_color),
+					S_0280A0_BLEND_CLAMP(1), NULL);
+
 	return rstate;
 }
 
@@ -719,7 +724,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 	struct r600_surface *surf;
 	unsigned level = state->cbufs[cb]->u.tex.level;
 	unsigned pitch, slice;
-	unsigned color_info;
+	unsigned color_info, color_info_mask;
 	unsigned format, swap, ntype, endian;
 	unsigned offset;
 	const struct util_format_description *desc;
@@ -772,22 +777,28 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 	color_info = S_0280A0_FORMAT(format) |
 		S_0280A0_COMP_SWAP(swap) |
 		S_0280A0_ARRAY_MODE(rtex->array_mode[level]) |
-		S_0280A0_BLEND_CLAMP(1) |
 		S_0280A0_NUMBER_TYPE(ntype) |
 		S_0280A0_ENDIAN(endian);
 
+	color_info_mask = 0xFFFFFFFF & ~S_0280A0_BLEND_CLAMP(1);
+
 	/* on R600 this can't be set if BLEND_CLAMP isn't set,
 	   if BLEND_FLOAT32 is set of > 11 bits in a UNORM or SNORM */
-	if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
-	    desc->channel[i].size < 12)
+	if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS && desc->channel[i].size < 12) {
+		//TODO: Seems to work on RV710, but i have no idea what to do between R600-RV710
+		if (rctx->family < CHIP_RV710) {
+			color_info |= S_0280A0_BLEND_CLAMP(1);
+			color_info_mask |= S_0280A0_BLEND_CLAMP(1);
+		}
 		color_info |= S_0280A0_SOURCE_FORMAT(V_0280A0_EXPORT_NORM);
+	}
 
 	r600_pipe_state_add_reg(rstate,
 				R_028040_CB_COLOR0_BASE + cb * 4,
 				(offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
 	r600_pipe_state_add_reg(rstate,
 				R_0280A0_CB_COLOR0_INFO + cb * 4,
-				color_info, 0xFFFFFFFF, NULL);
+				color_info, color_info_mask, NULL);
 	r600_pipe_state_add_reg(rstate,
 				R_028060_CB_COLOR0_SIZE + cb * 4,
 				S_028060_PITCH_TILE_MAX(pitch) |
-- 
cgit v1.2.3


From b54909910c04313fb45c4e8f39091ad73ec329f3 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 24 Apr 2011 16:04:23 +0200
Subject: [g3dvl] remove resource_format workaround

---
 src/gallium/auxiliary/vl/vl_context.c          | 7 ++++++-
 src/gallium/include/pipe/p_video_context.h     | 1 -
 src/gallium/state_trackers/vdpau/surface.c     | 7 -------
 src/gallium/state_trackers/xorg/xvmc/surface.c | 7 -------
 4 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index 49a9b50863c..1240b0b4c32 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -199,10 +199,15 @@ vl_context_create_decoder(struct pipe_video_context *context,
 static struct pipe_video_buffer *
 vl_context_create_buffer(struct pipe_video_context *context,
                          enum pipe_format buffer_format,
-                         const enum pipe_format resource_formats[3],
                          enum pipe_video_chroma_format chroma_format,
                          unsigned width, unsigned height)
 {
+   const enum pipe_format resource_formats[3] = {
+      PIPE_FORMAT_R8_UNORM,
+      PIPE_FORMAT_R8_UNORM,
+      PIPE_FORMAT_R8_UNORM
+   };
+
    struct vl_context *ctx = (struct vl_context*)context;
    struct pipe_video_buffer *result;
    unsigned buffer_width, buffer_height;
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 8775bbb2705..7e971641618 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -121,7 +121,6 @@ struct pipe_video_context
     */
    struct pipe_video_buffer *(*create_buffer)(struct pipe_video_context *context,
                                               enum pipe_format buffer_format,
-                                              const enum pipe_format resource_formats[3],
                                               enum pipe_video_chroma_format chroma_format,
                                               unsigned width, unsigned height);
 
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index 57f5563b12f..cd2125fce63 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -41,12 +41,6 @@ vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
                         uint32_t width, uint32_t height,
                         VdpVideoSurface *surface)
 {
-   const enum pipe_format resource_formats[3] = {
-      PIPE_FORMAT_R8_UNORM,
-      PIPE_FORMAT_R8_UNORM,
-      PIPE_FORMAT_R8_UNORM
-   };
-
    vlVdpSurface *p_surf;
    VdpStatus ret;
 
@@ -77,7 +71,6 @@ vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
    p_surf->device = dev;
    p_surf->video_buffer = dev->context->vpipe->create_buffer(dev->context->vpipe,
                                                              PIPE_FORMAT_YV12, // most common used
-                                                             resource_formats,
                                                              ChromaToPipe(chroma_type),
                                                              width, height);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index f8a0f3c7fd9..7429fdfcfb9 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -284,12 +284,6 @@ unmap_and_flush_surface(XvMCSurfacePrivate *surface)
 PUBLIC
 Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surface)
 {
-   const enum pipe_format resource_formats[3] = {
-      PIPE_FORMAT_R8_SNORM,
-      PIPE_FORMAT_R8_SNORM,
-      PIPE_FORMAT_R8_SNORM
-   };
-
    XvMCContextPrivate *context_priv;
    struct pipe_video_context *vpipe;
    XvMCSurfacePrivate *surface_priv;
@@ -313,7 +307,6 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
    surface_priv->decode_buffer = context_priv->decoder->create_buffer(context_priv->decoder);
    surface_priv->mv_stride = surface_priv->decode_buffer->get_mv_stream_stride(surface_priv->decode_buffer);
    surface_priv->video_buffer = vpipe->create_buffer(vpipe, PIPE_FORMAT_YV12, //TODO
-                                                     resource_formats,
                                                      context_priv->decoder->chroma_format,
                                                      context_priv->decoder->width,
                                                      context_priv->decoder->height);
-- 
cgit v1.2.3


From 3ea7e2713c836f23d59c4034385609e371a94c8d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 24 Apr 2011 19:20:33 +0200
Subject: [g3dvl] start supporting different render target formats

Let's start with NV12, but anything else shouldn't be much of a problem any more.
---
 src/gallium/auxiliary/vl/vl_compositor.c       |  2 +-
 src/gallium/auxiliary/vl/vl_context.c          | 35 ++++++++--
 src/gallium/auxiliary/vl/vl_mc.c               | 95 ++++++++++++++------------
 src/gallium/auxiliary/vl/vl_mc.h               | 14 ++--
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   | 53 ++++++++------
 src/gallium/auxiliary/vl/vl_video_buffer.c     | 73 ++++++++++++++------
 src/gallium/auxiliary/vl/vl_video_buffer.h     |  3 +-
 src/gallium/include/pipe/p_video_context.h     |  7 +-
 src/gallium/state_trackers/vdpau/surface.c     |  2 +-
 src/gallium/state_trackers/xorg/xvmc/surface.c |  2 +-
 10 files changed, 181 insertions(+), 105 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 25f7d5fa1da..e487abf915e 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -506,7 +506,7 @@ vl_compositor_set_buffer_layer(struct pipe_video_compositor *compositor,
    c->used_layers |= 1 << layer;
    c->layers[layer].fs = c->fs_video_buffer;
 
-   sampler_views = buffer->get_sampler_views(buffer);
+   sampler_views = buffer->get_sampler_view_components(buffer);
    for (i = 0; i < 3; ++i)
       pipe_sampler_view_reference(&c->layers[layer].sampler_views[i], sampler_views[i]);
 
diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index 1240b0b4c32..be28bb507e6 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -35,6 +35,18 @@
 #include "vl_compositor.h"
 #include "vl_mpeg12_decoder.h"
 
+const enum pipe_format const_resource_formats_YV12[3] = {
+   PIPE_FORMAT_R8_UNORM,
+   PIPE_FORMAT_R8_UNORM,
+   PIPE_FORMAT_R8_UNORM
+};
+
+const enum pipe_format const_resource_formats_NV12[3] = {
+   PIPE_FORMAT_R8_UNORM,
+   PIPE_FORMAT_R8G8_UNORM,
+   PIPE_FORMAT_NONE
+};
+
 static void
 vl_context_destroy(struct pipe_video_context *context)
 {
@@ -202,19 +214,28 @@ vl_context_create_buffer(struct pipe_video_context *context,
                          enum pipe_video_chroma_format chroma_format,
                          unsigned width, unsigned height)
 {
-   const enum pipe_format resource_formats[3] = {
-      PIPE_FORMAT_R8_UNORM,
-      PIPE_FORMAT_R8_UNORM,
-      PIPE_FORMAT_R8_UNORM
-   };
-
    struct vl_context *ctx = (struct vl_context*)context;
    struct pipe_video_buffer *result;
    unsigned buffer_width, buffer_height;
 
+   const enum pipe_format *resource_formats;
+
    assert(context);
    assert(width > 0 && height > 0);
-   assert(buffer_format == PIPE_FORMAT_YV12);
+
+   switch(buffer_format) {
+   case PIPE_FORMAT_YV12:
+      resource_formats = const_resource_formats_YV12;
+      break;
+
+   case PIPE_FORMAT_NV12:
+      resource_formats = const_resource_formats_NV12;
+      break;
+
+   default:
+      assert(0);
+      return NULL;
+   }
 
    buffer_width = ctx->pot_buffers ? util_next_power_of_two(width) : width;
    buffer_height = ctx->pot_buffers ? util_next_power_of_two(height) : height;
diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
index 137a1beaa0d..ecdce6b28bd 100644
--- a/src/gallium/auxiliary/vl/vl_mc.c
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -368,6 +368,7 @@ init_pipe_state(struct vl_mc *r)
    struct pipe_sampler_state sampler;
    struct pipe_blend_state blend;
    struct pipe_rasterizer_state rs_state;
+   unsigned i;
 
    assert(r);
 
@@ -391,28 +392,30 @@ init_pipe_state(struct vl_mc *r)
    if (!r->sampler_ycbcr)
       goto error_sampler_ycbcr;
 
-   memset(&blend, 0, sizeof blend);
-   blend.independent_blend_enable = 0;
-   blend.rt[0].blend_enable = 1;
-   blend.rt[0].rgb_func = PIPE_BLEND_ADD;
-   blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
-   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
-   blend.rt[0].alpha_func = PIPE_BLEND_ADD;
-   blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
-   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
-   blend.logicop_enable = 0;
-   blend.logicop_func = PIPE_LOGICOP_CLEAR;
-   blend.rt[0].colormask = PIPE_MASK_RGBA;
-   blend.dither = 0;
-   r->blend_clear = r->pipe->create_blend_state(r->pipe, &blend);
-   if (!r->blend_clear)
-      goto error_blend_clear;
-
-   blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
-   blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
-   r->blend_add = r->pipe->create_blend_state(r->pipe, &blend);
-   if (!r->blend_add)
-      goto error_blend_add;
+   for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
+      memset(&blend, 0, sizeof blend);
+      blend.independent_blend_enable = 0;
+      blend.rt[0].blend_enable = 1;
+      blend.rt[0].rgb_func = PIPE_BLEND_ADD;
+      blend.rt[0].rgb_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+      blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ZERO;
+      blend.rt[0].alpha_func = PIPE_BLEND_ADD;
+      blend.rt[0].alpha_src_factor = PIPE_BLENDFACTOR_SRC_ALPHA;
+      blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ZERO;
+      blend.logicop_enable = 0;
+      blend.logicop_func = PIPE_LOGICOP_CLEAR;
+      blend.rt[0].colormask = i;
+      blend.dither = 0;
+      r->blend_clear[i] = r->pipe->create_blend_state(r->pipe, &blend);
+      if (!r->blend_clear[i])
+         goto error_blend;
+
+      blend.rt[0].rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
+      blend.rt[0].alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
+      r->blend_add[i] = r->pipe->create_blend_state(r->pipe, &blend);
+      if (!r->blend_add[i])
+         goto error_blend;
+   }
 
    memset(&rs_state, 0, sizeof(rs_state));
    /*rs_state.sprite_coord_enable */
@@ -427,12 +430,15 @@ init_pipe_state(struct vl_mc *r)
    return true;
 
 error_rs_state:
-   r->pipe->delete_blend_state(r->pipe, r->blend_add);
+error_blend:
+   for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
+      if (r->blend_add[i])
+         r->pipe->delete_blend_state(r->pipe, r->blend_add[i]);
 
-error_blend_add:
-   r->pipe->delete_blend_state(r->pipe, r->blend_clear);
+      if (r->blend_clear[i])
+         r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]);
+   }
 
-error_blend_clear:
    r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
 
 error_sampler_ref:
@@ -445,12 +451,16 @@ error_sampler_ycbcr:
 static void
 cleanup_pipe_state(struct vl_mc *r)
 {
+   unsigned i;
+
    assert(r);
 
    r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
    r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr);
-   r->pipe->delete_blend_state(r->pipe, r->blend_clear);
-   r->pipe->delete_blend_state(r->pipe, r->blend_add);
+   for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
+      r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]);
+      r->pipe->delete_blend_state(r->pipe, r->blend_add[i]);
+   }
    r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
 }
 
@@ -520,11 +530,9 @@ vl_mc_cleanup(struct vl_mc *renderer)
 }
 
 bool
-vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer,
-                  struct pipe_sampler_view *source)
+vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer)
 {
    assert(renderer && buffer);
-   assert(source);
 
    buffer->renderer = renderer;
 
@@ -538,8 +546,6 @@ vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer,
    buffer->fb_state.nr_cbufs = 1;
    buffer->fb_state.zsbuf = NULL;
 
-   pipe_sampler_view_reference(&buffer->source, source);
-
    return true;
 }
 
@@ -547,8 +553,6 @@ void
 vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer)
 {
    assert(buffer);
-
-   pipe_sampler_view_reference(&buffer->source, NULL);
 }
 
 void
@@ -567,7 +571,7 @@ vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface)
 }
 
 static void
-prepare_pipe_4_rendering(struct vl_mc_buffer *buffer)
+prepare_pipe_4_rendering(struct vl_mc_buffer *buffer, unsigned mask)
 {
    struct vl_mc *renderer;
 
@@ -577,11 +581,9 @@ prepare_pipe_4_rendering(struct vl_mc_buffer *buffer)
    renderer->pipe->bind_rasterizer_state(renderer->pipe, renderer->rs_state);
 
    if (buffer->surface_cleared)
-      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add);
-   else {
-      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear);
-      buffer->surface_cleared = true;
-   }
+      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_add[mask]);
+   else
+      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_clear[mask]);
 
    renderer->pipe->set_framebuffer_state(renderer->pipe, &buffer->fb_state);
    renderer->pipe->set_viewport_state(renderer->pipe, &buffer->viewport);
@@ -594,7 +596,7 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref)
 
    assert(buffer && ref);
 
-   prepare_pipe_4_rendering(buffer);
+   prepare_pipe_4_rendering(buffer, PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B);
 
    renderer = buffer->renderer;
 
@@ -607,10 +609,13 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref)
    util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0,
                               renderer->buffer_width / MACROBLOCK_WIDTH *
                               renderer->buffer_height / MACROBLOCK_HEIGHT);
+
+   buffer->surface_cleared = true;
 }
 
 void
-vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned num_instances)
+vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source,
+                   unsigned component, unsigned num_instances)
 {
    struct vl_mc *renderer;
 
@@ -619,14 +624,14 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned num_instances)
    if (num_instances == 0)
       return;
 
-   prepare_pipe_4_rendering(buffer);
+   prepare_pipe_4_rendering(buffer, 1 << component);
 
    renderer = buffer->renderer;
 
    renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ycbcr);
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
 
-   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &buffer->source);
+   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &source);
    renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ycbcr);
 
    util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h
index bc2b0e7f149..353afa9df62 100644
--- a/src/gallium/auxiliary/vl/vl_mc.h
+++ b/src/gallium/auxiliary/vl/vl_mc.h
@@ -31,8 +31,11 @@
 #include <pipe/p_state.h>
 #include <pipe/p_video_state.h>
 
+#include "vl_defines.h"
 #include "vl_types.h"
 
+#define VL_MC_NUM_BLENDERS (1 << VL_MAX_PLANES)
+
 struct pipe_context;
 
 struct vl_mc
@@ -44,7 +47,8 @@ struct vl_mc
 
    void *rs_state;
 
-   void *blend_clear, *blend_add;
+   void *blend_clear[VL_MC_NUM_BLENDERS];
+   void *blend_add[VL_MC_NUM_BLENDERS];
    void *vs_ref, *vs_ycbcr;
    void *fs_ref, *fs_ycbcr;
    void *sampler_ref, *sampler_ycbcr;
@@ -58,8 +62,6 @@ struct vl_mc_buffer
 
    struct pipe_viewport_state viewport;
    struct pipe_framebuffer_state fb_state;
-
-   struct pipe_sampler_view *source;
 };
 
 bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe,
@@ -68,8 +70,7 @@ bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe,
 
 void vl_mc_cleanup(struct vl_mc *renderer);
 
-bool vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer,
-                       struct pipe_sampler_view *source);
+bool vl_mc_init_buffer(struct vl_mc *renderer, struct vl_mc_buffer *buffer);
 
 void vl_mc_cleanup_buffer(struct vl_mc_buffer *buffer);
 
@@ -77,6 +78,7 @@ void vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface
 
 void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref);
 
-void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned num_instances);
+void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source,
+                        unsigned component, unsigned num_instances);
 
 #endif /* vl_mc_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 5027db4314e..296f46aba52 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -97,7 +97,7 @@ init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
    if (!buffer->zscan_source)
       goto error_source;
 
-   source = buffer->zscan_source->get_sampler_views(buffer->zscan_source);
+   source = buffer->zscan_source->get_sampler_view_planes(buffer->zscan_source);
    if (!source)
       goto error_sampler;
 
@@ -174,11 +174,11 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer)
    if (!buffer->idct_intermediate)
       goto error_intermediate;
 
-   idct_source_sv = buffer->idct_source->get_sampler_views(buffer->idct_source);
+   idct_source_sv = buffer->idct_source->get_sampler_view_planes(buffer->idct_source);
    if (!idct_source_sv)
       goto error_source_sv;
 
-   idct_intermediate_sv = buffer->idct_intermediate->get_sampler_views(buffer->idct_intermediate);
+   idct_intermediate_sv = buffer->idct_intermediate->get_sampler_view_planes(buffer->idct_intermediate);
    if (!idct_intermediate_sv)
       goto error_intermediate_sv;
 
@@ -231,7 +231,6 @@ init_mc_buffer(struct vl_mpeg12_buffer *buf)
 {
    struct vl_mpeg12_decoder *dec;
    enum pipe_format formats[3];
-   struct pipe_sampler_view **mc_source_sv;
 
    assert(buf);
 
@@ -247,17 +246,13 @@ init_mc_buffer(struct vl_mpeg12_buffer *buf)
    if (!buf->mc_source)
       goto error_mc_source;
 
-   mc_source_sv = buf->mc_source->get_sampler_views(buf->mc_source);
-   if (!mc_source_sv)
-      goto error_mc_source_sv;
-
-   if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0], mc_source_sv[0]))
+   if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0]))
       goto error_mc_y;
 
-   if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[1], mc_source_sv[1]))
+   if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[1]))
       goto error_mc_cb;
 
-   if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[2], mc_source_sv[2]))
+   if(!vl_mc_init_buffer(&dec->mc_c, &buf->mc[2]))
       goto error_mc_cr;
 
    return true;
@@ -269,7 +264,6 @@ error_mc_cb:
    vl_mc_cleanup_buffer(&buf->mc[0]);
 
 error_mc_y:
-error_mc_source_sv:
    buf->mc_source->destroy(buf->mc_source);
 
 error_mc_source:
@@ -328,7 +322,7 @@ vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)
 
    vl_vb_map(&buf->vertex_stream, dec->pipe);
 
-   sampler_views = buf->zscan_source->get_sampler_views(buf->zscan_source);
+   sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source);
 
    assert(sampler_views);
 
@@ -510,12 +504,13 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
    struct vl_mpeg12_decoder *dec;
 
-   struct pipe_sampler_view **sv[2];
+   struct pipe_sampler_view **sv[VL_MAX_REF_FRAMES], **mc_source_sv;
    struct pipe_surface **surfaces;
 
    struct pipe_vertex_buffer vb[3];
 
-   unsigned i, j;
+   unsigned i, j, component;
+   unsigned nr_components;
 
    assert(buf);
 
@@ -523,19 +518,21 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
    assert(dec);
 
    for (i = 0; i < 2; ++i)
-      sv[i] = refs[i] ? refs[i]->get_sampler_views(refs[i]) : NULL;
-
-   surfaces = dst->get_surfaces(dst);
+      sv[i] = refs[i] ? refs[i]->get_sampler_view_planes(refs[i]) : NULL;
 
    vb[0] = dec->quads;
    vb[1] = dec->pos;
 
+   surfaces = dst->get_surfaces(dst);
+
    dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_mv);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
+      if (!surfaces[i]) continue;
+
       vl_mc_set_surface(&buf->mc[i], surfaces[i]);
 
-      for (j = 0; j < 2; ++j) {
-         if (sv[j] == NULL) continue;
+      for (j = 0; j < VL_MAX_REF_FRAMES; ++j) {
+         if (!sv[j]) continue;
 
          vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);;
          dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
@@ -546,7 +543,7 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 
    dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
-      if (num_ycbcr_blocks[i] == 0) continue;
+      if (!num_ycbcr_blocks[i]) continue;
 
       vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
       dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
@@ -555,8 +552,20 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
          vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_ycbcr_blocks[i]);
+   }
+
+   mc_source_sv = buf->mc_source->get_sampler_view_planes(buf->mc_source);
+   for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) {
+      if (!surfaces[i]) continue;
 
-      vl_mc_render_ycbcr(&buf->mc[i], num_ycbcr_blocks[i]);
+      nr_components = util_format_get_nr_components(surfaces[i]->texture->format);
+      for (j = 0; j < nr_components; ++j, ++component) {
+         if (!num_ycbcr_blocks[i]) continue;
+
+         vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component);
+         dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
+         vl_mc_render_ycbcr(&buf->mc[i], mc_source_sv[component], j, num_ycbcr_blocks[component]);
+      }
    }
 
    dec->pipe->flush(dec->pipe, fence);
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
index dad8dd2c9ae..dccd7e93945 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -38,17 +38,6 @@
 
 #include "vl_video_buffer.h"
 
-static inline void
-adjust_swizzle(struct pipe_sampler_view *sv_templ)
-{
-   if (util_format_get_nr_components(sv_templ->format) == 1) {
-      sv_templ->swizzle_r = PIPE_SWIZZLE_RED;
-      sv_templ->swizzle_g = PIPE_SWIZZLE_RED;
-      sv_templ->swizzle_b = PIPE_SWIZZLE_RED;
-      sv_templ->swizzle_a = PIPE_SWIZZLE_RED;
-   }
-}
-
 static void
 vl_video_buffer_destroy(struct pipe_video_buffer *buffer)
 {
@@ -59,13 +48,14 @@ vl_video_buffer_destroy(struct pipe_video_buffer *buffer)
 
    for (i = 0; i < VL_MAX_PLANES; ++i) {
       pipe_surface_reference(&buf->surfaces[i], NULL);
-      pipe_sampler_view_reference(&buf->sampler_views[i], NULL);
+      pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL);
+      pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL);
       pipe_resource_reference(&buf->resources[i], NULL);
    }
 }
 
 static struct pipe_sampler_view **
-vl_video_buffer_sampler_views(struct pipe_video_buffer *buffer)
+vl_video_buffer_sampler_view_planes(struct pipe_video_buffer *buffer)
 {
    struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer;
    struct pipe_sampler_view sv_templ;
@@ -77,21 +67,63 @@ vl_video_buffer_sampler_views(struct pipe_video_buffer *buffer)
    pipe = buf->pipe;
 
    for (i = 0; i < buf->num_planes; ++i ) {
-      if (!buf->sampler_views[i]) {
+      if (!buf->sampler_view_planes[i]) {
          memset(&sv_templ, 0, sizeof(sv_templ));
          u_sampler_view_default_template(&sv_templ, buf->resources[i], buf->resources[i]->format);
-         adjust_swizzle(&sv_templ);
-         buf->sampler_views[i] = pipe->create_sampler_view(pipe, buf->resources[i], &sv_templ);
-         if (!buf->sampler_views[i])
+
+         if (util_format_get_nr_components(buf->resources[i]->format) == 1)
+            sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = sv_templ.swizzle_a = PIPE_SWIZZLE_RED;
+
+         buf->sampler_view_planes[i] = pipe->create_sampler_view(pipe, buf->resources[i], &sv_templ);
+         if (!buf->sampler_view_planes[i])
             goto error;
       }
    }
 
-   return buf->sampler_views;
+   return buf->sampler_view_planes;
 
 error:
    for (i = 0; i < buf->num_planes; ++i )
-      pipe_sampler_view_reference(&buf->sampler_views[i], NULL);
+      pipe_sampler_view_reference(&buf->sampler_view_planes[i], NULL);
+
+   return NULL;
+}
+
+static struct pipe_sampler_view **
+vl_video_buffer_sampler_view_components(struct pipe_video_buffer *buffer)
+{
+   struct vl_video_buffer *buf = (struct vl_video_buffer *)buffer;
+   struct pipe_sampler_view sv_templ;
+   struct pipe_context *pipe;
+   unsigned i, j, component;
+
+   assert(buf);
+
+   pipe = buf->pipe;
+
+   for (component = 0, i = 0; i < buf->num_planes; ++i ) {
+      unsigned nr_components = util_format_get_nr_components(buf->resources[i]->format);
+
+      for (j = 0; j < nr_components; ++j, ++component) {
+         assert(component < VL_MAX_PLANES);
+
+         if (!buf->sampler_view_components[component]) {
+            memset(&sv_templ, 0, sizeof(sv_templ));
+            u_sampler_view_default_template(&sv_templ, buf->resources[i], buf->resources[i]->format);
+            sv_templ.swizzle_r = sv_templ.swizzle_g = sv_templ.swizzle_b = PIPE_SWIZZLE_RED + j;
+            sv_templ.swizzle_a = PIPE_SWIZZLE_ONE;
+            buf->sampler_view_components[component] = pipe->create_sampler_view(pipe, buf->resources[i], &sv_templ);
+            if (!buf->sampler_view_components[component])
+               goto error;
+         }
+      }
+   }
+
+   return buf->sampler_view_components;
+
+error:
+   for (i = 0; i < VL_MAX_PLANES; ++i )
+      pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL);
 
    return NULL;
 }
@@ -145,7 +177,8 @@ vl_video_buffer_init(struct pipe_video_context *context,
    buffer = CALLOC_STRUCT(vl_video_buffer);
 
    buffer->base.destroy = vl_video_buffer_destroy;
-   buffer->base.get_sampler_views = vl_video_buffer_sampler_views;
+   buffer->base.get_sampler_view_planes = vl_video_buffer_sampler_view_planes;
+   buffer->base.get_sampler_view_components = vl_video_buffer_sampler_view_components;
    buffer->base.get_surfaces = vl_video_buffer_surfaces;
    buffer->pipe = pipe;
    buffer->num_planes = 1;
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h
index 960acd28060..2dca74f641e 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.h
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.h
@@ -44,7 +44,8 @@ struct vl_video_buffer
    struct pipe_context      *pipe;
    unsigned                 num_planes;
    struct pipe_resource     *resources[VL_MAX_PLANES];
-   struct pipe_sampler_view *sampler_views[VL_MAX_PLANES];
+   struct pipe_sampler_view *sampler_view_planes[VL_MAX_PLANES];
+   struct pipe_sampler_view *sampler_view_components[VL_MAX_PLANES];
    struct pipe_surface      *surfaces[VL_MAX_PLANES];
 };
 
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 7e971641618..88d3ca1f4e4 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -235,7 +235,12 @@ struct pipe_video_buffer
    /**
     * get a individual sampler view for each plane
     */
-   struct pipe_sampler_view **(*get_sampler_views)(struct pipe_video_buffer *buffer);
+   struct pipe_sampler_view **(*get_sampler_view_planes)(struct pipe_video_buffer *buffer);
+
+   /**
+    * get a individual sampler view for each component
+    */
+   struct pipe_sampler_view **(*get_sampler_view_components)(struct pipe_video_buffer *buffer);
 
    /**
     * get a individual surfaces for each plane
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index cd2125fce63..c30cd07f434 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -176,7 +176,7 @@ vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface,
       return VDP_STATUS_NO_IMPLEMENTATION;
    }
 
-   sampler_views = p_surf->video_buffer->get_sampler_views(p_surf->video_buffer);
+   sampler_views = p_surf->video_buffer->get_sampler_view_planes(p_surf->video_buffer);
    if (!sampler_views)
       return VDP_STATUS_RESOURCES;
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 7429fdfcfb9..cfa15e120d9 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -306,7 +306,7 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
 
    surface_priv->decode_buffer = context_priv->decoder->create_buffer(context_priv->decoder);
    surface_priv->mv_stride = surface_priv->decode_buffer->get_mv_stream_stride(surface_priv->decode_buffer);
-   surface_priv->video_buffer = vpipe->create_buffer(vpipe, PIPE_FORMAT_YV12, //TODO
+   surface_priv->video_buffer = vpipe->create_buffer(vpipe, PIPE_FORMAT_NV12,
                                                      context_priv->decoder->chroma_format,
                                                      context_priv->decoder->width,
                                                      context_priv->decoder->height);
-- 
cgit v1.2.3


From 104ac0066394f8246d18c833bca4bcce271b5eef Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 24 Apr 2011 20:08:11 +0200
Subject: [g3dvl] rework fence handling and add r600g workaround

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c        |  5 +----
 src/gallium/include/pipe/p_video_context.h          |  3 +--
 src/gallium/state_trackers/xorg/xvmc/surface.c      | 20 +++++++++++++++++---
 src/gallium/state_trackers/xorg/xvmc/xvmc_private.h |  4 +---
 4 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 296f46aba52..37789707a6b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -498,8 +498,7 @@ static void
 vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
                                unsigned num_ycbcr_blocks[3],
                                struct pipe_video_buffer *refs[2],
-                               struct pipe_video_buffer *dst,
-                               struct pipe_fence_handle **fence)
+                               struct pipe_video_buffer *dst)
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer *)buffer;
    struct vl_mpeg12_decoder *dec;
@@ -567,8 +566,6 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
          vl_mc_render_ycbcr(&buf->mc[i], mc_source_sv[component], j, num_ycbcr_blocks[component]);
       }
    }
-
-   dec->pipe->flush(dec->pipe, fence);
 }
 
 static bool
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 88d3ca1f4e4..41f2e320580 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -159,8 +159,7 @@ struct pipe_video_decoder
    void (*flush_buffer)(struct pipe_video_decode_buffer *decbuf,
                         unsigned num_ycbcr_blocks[3],
                         struct pipe_video_buffer *ref_frames[2],
-                        struct pipe_video_buffer *dst,
-                        struct pipe_fence_handle **fence);
+                        struct pipe_video_buffer *dst);
 };
 
 /**
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index cfa15e120d9..fc4593dbc5f 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -275,8 +275,7 @@ unmap_and_flush_surface(XvMCSurfacePrivate *surface)
       context_priv->decoder->flush_buffer(surface->decode_buffer,
                                           num_ycbcr_blocks,
                                           ref_frames,
-                                          surface->video_buffer,
-                                          &surface->flush_fence);
+                                          surface->video_buffer);
       surface->mapped = 0;
    }
 }
@@ -537,7 +536,10 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
       subpicture_priv->surface = NULL;
    }
 
-   compositor->render_picture(compositor, PictureToPipe(flags), drawable_surface, &dst_rect, &surface_priv->disp_fence);
+   // Workaround for r600g, there seems to be a bug in the fence refcounting code
+   vpipe->screen->fence_reference(vpipe->screen, &surface_priv->fence, NULL);
+
+   compositor->render_picture(compositor, PictureToPipe(flags), drawable_surface, &dst_rect, &surface_priv->fence);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);
 
@@ -572,6 +574,10 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
 PUBLIC
 Status XvMCGetSurfaceStatus(Display *dpy, XvMCSurface *surface, int *status)
 {
+   struct pipe_video_context *vpipe;
+   XvMCSurfacePrivate *surface_priv;
+   XvMCContextPrivate *context_priv;
+
    assert(dpy);
 
    if (!surface)
@@ -579,8 +585,16 @@ Status XvMCGetSurfaceStatus(Display *dpy, XvMCSurface *surface, int *status)
 
    assert(status);
 
+   surface_priv = surface->privData;
+   context_priv = surface_priv->context->privData;
+   vpipe = context_priv->vctx->vpipe;
+
    *status = 0;
 
+   if (surface_priv->fence)
+      if (!vpipe->screen->fence_signalled(vpipe->screen, surface_priv->fence))
+         *status |= XVMC_RENDERING;
+
    return Success;
 }
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index bdcda3bb56f..262adac7ab6 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -83,9 +83,7 @@ typedef struct
       struct pipe_motionvector *mv;
    } ref[2];
 
-   struct pipe_fence_handle *flush_fence;
-   struct pipe_fence_handle *render_fence;
-   struct pipe_fence_handle *disp_fence;
+   struct pipe_fence_handle *fence;
 
    /* The subpicture associated with this surface, if any. */
    XvMCSubpicture *subpicture;
-- 
cgit v1.2.3


From 7f04fe5338d0846ec9a6003033da5357d2785c8b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 25 Apr 2011 01:20:15 +0200
Subject: [g3dvl] merge idct stage 2 and mc ycbcr stage into a single draw

---
 src/gallium/auxiliary/vl/vl_idct.c           | 312 +++++++++++++--------------
 src/gallium/auxiliary/vl/vl_idct.h           |  52 +++--
 src/gallium/auxiliary/vl/vl_mc.c             |  67 +++---
 src/gallium/auxiliary/vl/vl_mc.h             |  22 +-
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 145 +++++++++----
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h |   4 +-
 6 files changed, 344 insertions(+), 258 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index ca3b1cb53ab..ebb4ad4fe0c 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -86,8 +86,54 @@ calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
    ureg_MOV(shader, ureg_writemask(addr[1], TGSI_WRITEMASK_Z), tc);
 }
 
+static void
+increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
+               struct ureg_src saddr[2], bool right_side, bool transposed,
+               int pos, float size)
+{
+   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
+   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
+
+   /*
+    * daddr[0..1].(start) = saddr[0..1].(start)
+    * daddr[0..1].(tc) = saddr[0..1].(tc)
+    */
+
+   ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
+   ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
+   ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
+   ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
+}
+
+static void
+fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler)
+{
+   ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler);
+   ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler);
+}
+
+static void
+matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
+{
+   struct ureg_dst tmp;
+
+   tmp = ureg_DECL_temporary(shader);
+
+   /*
+    * tmp.xy = dot4(m[0][0..1], m[1][0..1])
+    * dst = tmp.x + tmp.y
+    */
+   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
+   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
+   ureg_ADD(shader, dst,
+      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
+      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
+
+   ureg_release_temporary(shader, tmp);
+}
+
 static void *
-create_vert_shader(struct vl_idct *idct, bool matrix_stage)
+create_stage1_vert_shader(struct vl_idct *idct)
 {
    struct ureg_program *shader;
    struct ureg_src vrect, vpos;
@@ -99,12 +145,12 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage)
    if (!shader)
       return NULL;
 
-   t_tex = ureg_DECL_temporary(shader);
-   t_start = ureg_DECL_temporary(shader);
-
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
 
+   t_tex = ureg_DECL_temporary(shader);
+   t_start = ureg_DECL_temporary(shader);
+
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
 
    o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
@@ -131,22 +177,17 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage)
 
    ureg_ADD(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, vrect);
    ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), ureg_src(t_tex), scale);
-   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
-      ureg_scalar(vrect, TGSI_SWIZZLE_X),
-      ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets));
 
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
 
+   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
+      ureg_scalar(vrect, TGSI_SWIZZLE_X),
+      ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets));
    ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
 
-   if(matrix_stage) {
-      calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
-      calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
-   } else {
-      calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
-      calc_addr(shader, o_r_addr, ureg_src(t_tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
-   }
+   calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
+   calc_addr(shader, o_r_addr, vrect, ureg_imm1f(shader, 0.0f), true, true, BLOCK_WIDTH / 4);
 
    ureg_release_temporary(shader, t_tex);
    ureg_release_temporary(shader, t_start);
@@ -156,54 +197,8 @@ create_vert_shader(struct vl_idct *idct, bool matrix_stage)
    return ureg_create_shader_and_destroy(shader, idct->pipe);
 }
 
-static void
-increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
-               struct ureg_src saddr[2], bool right_side, bool transposed,
-               int pos, float size)
-{
-   unsigned wm_start = (right_side == transposed) ? TGSI_WRITEMASK_X : TGSI_WRITEMASK_Y;
-   unsigned wm_tc = (right_side == transposed) ? TGSI_WRITEMASK_Y : TGSI_WRITEMASK_X;
-
-   /*
-    * daddr[0..1].(start) = saddr[0..1].(start)
-    * daddr[0..1].(tc) = saddr[0..1].(tc)
-    */
-
-   ureg_MOV(shader, ureg_writemask(daddr[0], wm_start), saddr[0]);
-   ureg_ADD(shader, ureg_writemask(daddr[0], wm_tc), saddr[0], ureg_imm1f(shader, pos / size));
-   ureg_MOV(shader, ureg_writemask(daddr[1], wm_start), saddr[1]);
-   ureg_ADD(shader, ureg_writemask(daddr[1], wm_tc), saddr[1], ureg_imm1f(shader, pos / size));
-}
-
-static void
-fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler)
-{
-   ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler);
-   ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler);
-}
-
-static void
-matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2], struct ureg_dst r[2])
-{
-   struct ureg_dst tmp;
-
-   tmp = ureg_DECL_temporary(shader);
-
-   /*
-    * tmp.xy = dot4(m[0][0..1], m[1][0..1])
-    * dst = tmp.x + tmp.y
-    */
-   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(l[0]), ureg_src(r[0]));
-   ureg_DP4(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(l[1]), ureg_src(r[1]));
-   ureg_ADD(shader, dst,
-      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X),
-      ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
-
-   ureg_release_temporary(shader, tmp);
-}
-
 static void *
-create_matrix_frag_shader(struct vl_idct *idct)
+create_stage1_frag_shader(struct vl_idct *idct)
 {
    struct ureg_program *shader;
 
@@ -272,25 +267,56 @@ create_matrix_frag_shader(struct vl_idct *idct)
    return ureg_create_shader_and_destroy(shader, idct->pipe);
 }
 
-static void *
-create_transpose_frag_shader(struct vl_idct *idct)
+void
+vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
+                           unsigned first_output, struct ureg_dst tex)
 {
-   struct ureg_program *shader;
+   struct ureg_src vrect, vpos;
+   struct ureg_src scale;
+   struct ureg_dst t_start;
+   struct ureg_dst o_l_addr[2], o_r_addr[2];
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
 
+   t_start = ureg_DECL_temporary(shader);
+
+   --first_output;
+
+   o_l_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR0);
+   o_l_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_L_ADDR1);
+
+   o_r_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR0);
+   o_r_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output + VS_O_R_ADDR1);
+
+   scale = ureg_imm2f(shader,
+      (float)BLOCK_WIDTH / idct->buffer_width,
+      (float)BLOCK_HEIGHT / idct->buffer_height);
+
+   ureg_MUL(shader, ureg_writemask(tex, TGSI_WRITEMASK_Z),
+      ureg_scalar(vrect, TGSI_SWIZZLE_X),
+      ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets));
+   ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
+
+   calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
+   calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
+}
+
+void
+vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
+                           unsigned first_input, struct ureg_dst fragment)
+{
    struct ureg_src l_addr[2], r_addr[2];
 
    struct ureg_dst l[2], r[2];
-   struct ureg_dst fragment;
 
-   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
-   if (!shader)
-      return NULL;
+   --first_input;
 
-   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
-   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
+   l_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   l_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
 
-   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
-   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
+   r_addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   r_addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input + VS_O_R_ADDR1, TGSI_INTERPOLATE_LINEAR);
 
    l[0] = ureg_DECL_temporary(shader);
    l[1] = ureg_DECL_temporary(shader);
@@ -300,61 +326,39 @@ create_transpose_frag_shader(struct vl_idct *idct)
    fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0));
    fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1));
 
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
-
-   matrix_mul(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X), l, r);
+   matrix_mul(shader, fragment, l, r);
 
    ureg_release_temporary(shader, l[0]);
    ureg_release_temporary(shader, l[1]);
    ureg_release_temporary(shader, r[0]);
    ureg_release_temporary(shader, r[1]);
-
-   ureg_END(shader);
-
-   return ureg_create_shader_and_destroy(shader, idct->pipe);
 }
 
 static bool
 init_shaders(struct vl_idct *idct)
 {
-   idct->matrix_vs = create_vert_shader(idct, true);
-   if (!idct->matrix_vs)
-      goto error_matrix_vs;
-
-   idct->matrix_fs = create_matrix_frag_shader(idct);
-   if (!idct->matrix_fs)
-      goto error_matrix_fs;
+   idct->vs = create_stage1_vert_shader(idct);
+   if (!idct->vs)
+      goto error_vs;
 
-   idct->transpose_vs = create_vert_shader(idct, false);
-   if (!idct->transpose_vs)
-      goto error_transpose_vs;
-
-   idct->transpose_fs = create_transpose_frag_shader(idct);
-   if (!idct->transpose_fs)
-      goto error_transpose_fs;
+   idct->fs = create_stage1_frag_shader(idct);
+   if (!idct->fs)
+      goto error_fs;
 
    return true;
 
-error_transpose_fs:
-   idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
-
-error_transpose_vs:
-   idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
-
-error_matrix_fs:
-   idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
+error_fs:
+   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
 
-error_matrix_vs:
+error_vs:
    return false;
 }
 
 static void
 cleanup_shaders(struct vl_idct *idct)
 {
-   idct->pipe->delete_vs_state(idct->pipe, idct->matrix_vs);
-   idct->pipe->delete_fs_state(idct->pipe, idct->matrix_fs);
-   idct->pipe->delete_vs_state(idct->pipe, idct->transpose_vs);
-   idct->pipe->delete_fs_state(idct->pipe, idct->transpose_fs);
+   idct->pipe->delete_vs_state(idct->pipe, idct->vs);
+   idct->pipe->delete_fs_state(idct->pipe, idct->fs);
 }
 
 static bool
@@ -447,30 +451,30 @@ init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 
    tex = buffer->sampler_views.individual.intermediate->texture;
 
-   buffer->fb_state[0].width = tex->width0;
-   buffer->fb_state[0].height = tex->height0;
-   buffer->fb_state[0].nr_cbufs = idct->nr_of_render_targets;
+   buffer->fb_state.width = tex->width0;
+   buffer->fb_state.height = tex->height0;
+   buffer->fb_state.nr_cbufs = idct->nr_of_render_targets;
    for(i = 0; i < idct->nr_of_render_targets; ++i) {
       memset(&surf_templ, 0, sizeof(surf_templ));
       surf_templ.format = tex->format;
       surf_templ.u.tex.first_layer = i;
       surf_templ.u.tex.last_layer = i;
       surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-      buffer->fb_state[0].cbufs[i] = idct->pipe->create_surface(
+      buffer->fb_state.cbufs[i] = idct->pipe->create_surface(
          idct->pipe, tex, &surf_templ);
 
-      if (!buffer->fb_state[0].cbufs[i])
+      if (!buffer->fb_state.cbufs[i])
          goto error_surfaces;
    }
 
-   buffer->viewport[0].scale[0] = tex->width0;
-   buffer->viewport[0].scale[1] = tex->height0;
+   buffer->viewport.scale[0] = tex->width0;
+   buffer->viewport.scale[1] = tex->height0;
 
    return true;
 
 error_surfaces:
    for(i = 0; i < idct->nr_of_render_targets; ++i)
-      pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
+      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
 
    return false;
 }
@@ -483,7 +487,7 @@ cleanup_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    assert(idct && buffer);
 
    for(i = 0; i < idct->nr_of_render_targets; ++i)
-      pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
+      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
 
    pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, NULL);
 }
@@ -607,13 +611,13 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
                     struct pipe_sampler_view *intermediate,
                     struct pipe_surface *destination)
 {
-   unsigned i;
-
    assert(buffer);
    assert(idct);
    assert(source);
    assert(destination);
 
+   memset(buffer, 0, sizeof(struct vl_idct_buffer));
+
    pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
    pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
    pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
@@ -622,25 +626,12 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
    if (!init_intermediate(idct, buffer))
       return false;
 
-   /* init state */
-   buffer->fb_state[1].width = destination->texture->width0;
-   buffer->fb_state[1].height = destination->texture->height0;
-   buffer->fb_state[1].nr_cbufs = 1;
-   pipe_surface_reference(&buffer->fb_state[1].cbufs[0], destination);
-
-   buffer->viewport[1].scale[0] = destination->texture->width0;
-   buffer->viewport[1].scale[1] = destination->texture->height0;
-
-   for(i = 0; i < 2; ++i) {
-      buffer->viewport[i].scale[2] = 1;
-      buffer->viewport[i].scale[3] = 1;
-      buffer->viewport[i].translate[0] = 0;
-      buffer->viewport[i].translate[1] = 0;
-      buffer->viewport[i].translate[2] = 0;
-      buffer->viewport[i].translate[3] = 0;
-
-      buffer->fb_state[i].zsbuf = NULL;
-   }
+   buffer->viewport.scale[2] = 1;
+   buffer->viewport.scale[3] = 1;
+   buffer->viewport.translate[0] = 0;
+   buffer->viewport.translate[1] = 0;
+   buffer->viewport.translate[2] = 0;
+   buffer->viewport.translate[3] = 0;
 
    return true;
 }
@@ -653,9 +644,7 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
    assert(idct && buffer);
 
    for(i = 0; i < idct->nr_of_render_targets; ++i)
-      pipe_surface_reference(&buffer->fb_state[0].cbufs[i], NULL);
-
-   pipe_surface_reference(&buffer->fb_state[1].cbufs[0], NULL);
+      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
 
    cleanup_intermediate(idct, buffer);
 }
@@ -666,25 +655,28 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_
    assert(idct);
    assert(buffer);
 
-   if(num_instances > 0) {
-      idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
-      idct->pipe->bind_blend_state(idct->pipe, idct->blend);
-      idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
-
-      /* first stage */
-      idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[0]);
-      idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[0]);
-      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
-      idct->pipe->bind_vs_state(idct->pipe, idct->matrix_vs);
-      idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
-      util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
-
-      /* second stage */
-      idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state[1]);
-      idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport[1]);
-      idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
-      idct->pipe->bind_vs_state(idct->pipe, idct->transpose_vs);
-      idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
-      util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
-   }
+   idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
+   idct->pipe->bind_blend_state(idct->pipe, idct->blend);
+   idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
+
+   /* first stage */
+   idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state);
+   idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport);
+   idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
+   idct->pipe->bind_vs_state(idct->pipe, idct->vs);
+   idct->pipe->bind_fs_state(idct->pipe, idct->fs);
+   util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
+}
+
+void
+vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+{
+   assert(idct);
+   assert(buffer);
+
+   /* second stage */
+   idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
+   idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
+   idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
 }
+
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 96933b9d889..f5a1e5d9b73 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -30,6 +30,8 @@
 
 #include <pipe/p_state.h>
 
+#include <tgsi/tgsi_ureg.h>
+
 /* shader based inverse distinct cosinus transformation
  * expect usage of vl_vertex_buffers as a todo list
  */
@@ -46,8 +48,7 @@ struct vl_idct
 
    void *samplers[2];
 
-   void *matrix_vs, *transpose_vs;
-   void *matrix_fs, *transpose_fs;
+   void *vs, *fs;
 
    struct pipe_sampler_view *matrix;
    struct pipe_sampler_view *transpose;
@@ -56,8 +57,8 @@ struct vl_idct
 /* a set of buffers to work with */
 struct vl_idct_buffer
 {
-   struct pipe_viewport_state viewport[2];
-   struct pipe_framebuffer_state fb_state[2];
+   struct pipe_viewport_state viewport;
+   struct pipe_framebuffer_state fb_state;
 
    union
    {
@@ -71,28 +72,45 @@ struct vl_idct_buffer
 };
 
 /* upload the idct matrix, which can be shared by all idct instances of a pipe */
-struct pipe_sampler_view *vl_idct_upload_matrix(struct pipe_context *pipe, float scale);
+struct pipe_sampler_view *
+vl_idct_upload_matrix(struct pipe_context *pipe, float scale);
+
+void
+vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
+                           unsigned first_output, struct ureg_dst tex);
+
+void
+vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
+                           unsigned first_input, struct ureg_dst fragment);
 
 /* init an idct instance */
-bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
-                  unsigned buffer_width, unsigned buffer_height,
-                  unsigned nr_of_render_targets,
-                  struct pipe_sampler_view *matrix,
-                  struct pipe_sampler_view *transpose);
+bool
+vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
+             unsigned buffer_width, unsigned buffer_height,
+             unsigned nr_of_render_targets,
+             struct pipe_sampler_view *matrix,
+             struct pipe_sampler_view *transpose);
 
 /* destroy an idct instance */
-void vl_idct_cleanup(struct vl_idct *idct);
+void
+vl_idct_cleanup(struct vl_idct *idct);
 
 /* init a buffer assosiated with agiven idct instance */
-bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
-                         struct pipe_sampler_view *source,
-                         struct pipe_sampler_view *intermediate,
-                         struct pipe_surface *destination);
+bool
+vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
+                    struct pipe_sampler_view *source,
+                    struct pipe_sampler_view *intermediate,
+                    struct pipe_surface *destination);
 
 /* cleanup a buffer of an idct instance */
-void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
+void
+vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
 /* flush the buffer and start rendering, vertex buffers needs to be setup before calling this */
-void vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_verts);
+void
+vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_verts);
+
+void
+vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer);
 
 #endif
diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
index ecdce6b28bd..7474c58250d 100644
--- a/src/gallium/auxiliary/vl/vl_mc.c
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -37,12 +37,16 @@
 #include "vl_defines.h"
 #include "vl_vertex_buffers.h"
 #include "vl_mc.h"
+#include "vl_idct.h"
 
 enum VS_OUTPUT
 {
    VS_O_VPOS,
    VS_O_VTOP,
-   VS_O_VBOTTOM
+   VS_O_VBOTTOM,
+
+   VS_O_FLAGS = VS_O_VTOP,
+   VS_O_VTEX = VS_O_VBOTTOM
 };
 
 static struct ureg_dst
@@ -220,13 +224,13 @@ create_ref_frag_shader(struct vl_mc *r)
 }
 
 static void *
-create_ycbcr_vert_shader(struct vl_mc *r)
+create_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, void *callback_priv)
 {
    struct ureg_program *shader;
 
    struct ureg_src vrect, vpos;
    struct ureg_dst t_vpos, t_vtex;
-   struct ureg_dst o_vpos, o_vtex;
+   struct ureg_dst o_vpos, o_flags;
 
    struct vertex2f scale = {
       (float)BLOCK_WIDTH / r->buffer_width * MACROBLOCK_WIDTH / r->macroblock_size,
@@ -246,11 +250,11 @@ create_ycbcr_vert_shader(struct vl_mc *r)
    t_vtex = ureg_DECL_temporary(shader);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
-   o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP);
+   o_flags = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS);
 
    /*
     * o_vtex.xy = t_vpos
-    * o_vtex.z = intra * 0.5
+    * o_flags.z = intra * 0.5
     *
     * if(interlaced) {
     *    t_vtex.xy = vrect.y ? { 0, scale.y } : { -scale.y : 0 }
@@ -258,14 +262,16 @@ create_ycbcr_vert_shader(struct vl_mc *r)
     *    t_vtex.y = t_vtex.z ? t_vtex.x : t_vtex.y
     *    o_vpos.y = t_vtex.y + t_vpos.y
     *
-    *    o_vtex.w = t_vtex.z ? 0 : 1
+    *    o_flags.w = t_vtex.z ? 0 : 1
     * }
     *
     */
-   ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(t_vpos));
-   ureg_MUL(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_Z),
+
+   vs_callback(callback_priv, r, shader, VS_O_VTEX, t_vpos);
+
+   ureg_MUL(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_Z),
             ureg_scalar(vpos, TGSI_SWIZZLE_Z), ureg_imm1f(shader, 0.5f));
-   ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f));
+   ureg_MOV(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W), ureg_imm1f(shader, -1.0f));
 
    if (r->macroblock_size == MACROBLOCK_HEIGHT) { //TODO
       ureg_IF(shader, ureg_scalar(vpos, TGSI_SWIZZLE_W), &label);
@@ -286,7 +292,7 @@ create_ycbcr_vert_shader(struct vl_mc *r)
          ureg_ADD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_Y),
                   ureg_src(t_vpos), ureg_src(t_vtex));
 
-         ureg_CMP(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_W),
+         ureg_CMP(shader, ureg_writemask(o_flags, TGSI_WRITEMASK_W),
                   ureg_negate(ureg_scalar(ureg_src(t_vtex), TGSI_SWIZZLE_Z)),
                   ureg_imm1f(shader, 0.0f), ureg_imm1f(shader, 1.0f));
 
@@ -303,10 +309,10 @@ create_ycbcr_vert_shader(struct vl_mc *r)
 }
 
 static void *
-create_ycbcr_frag_shader(struct vl_mc *r, float scale)
+create_ycbcr_frag_shader(struct vl_mc *r, float scale, vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv)
 {
    struct ureg_program *shader;
-   struct ureg_src tc, sampler;
+   struct ureg_src flags;
    struct ureg_dst tmp;
    struct ureg_dst fragment;
    unsigned label;
@@ -315,9 +321,7 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale)
    if (!shader)
       return NULL;
 
-   tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTOP, TGSI_INTERPOLATE_LINEAR);
-
-   sampler = ureg_DECL_sampler(shader, 0);
+   flags = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_FLAGS, TGSI_INTERPOLATE_LINEAR);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -333,7 +337,7 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale)
     */
 
    ureg_SEQ(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y),
-            ureg_scalar(tc, TGSI_SWIZZLE_W), ureg_src(tmp));
+            ureg_scalar(flags, TGSI_SWIZZLE_W), ureg_src(tmp));
 
    ureg_IF(shader, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), &label);
 
@@ -342,15 +346,15 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale)
    ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
    ureg_ELSE(shader, &label);
 
-      ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc, sampler);
+      fs_callback(callback_priv, r, shader, VS_O_VTEX, tmp);
 
       if (scale != 1.0f)
          ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
                   ureg_src(tmp), ureg_imm1f(shader, scale),
-                  ureg_scalar(tc, TGSI_SWIZZLE_Z));
+                  ureg_scalar(flags, TGSI_SWIZZLE_Z));
       else
          ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
-                  ureg_src(tmp), ureg_scalar(tc, TGSI_SWIZZLE_Z));
+                  ureg_src(tmp), ureg_scalar(flags, TGSI_SWIZZLE_Z));
 
       ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
 
@@ -386,12 +390,6 @@ init_pipe_state(struct vl_mc *r)
    if (!r->sampler_ref)
       goto error_sampler_ref;
 
-   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
-   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
-   r->sampler_ycbcr = r->pipe->create_sampler_state(r->pipe, &sampler);
-   if (!r->sampler_ycbcr)
-      goto error_sampler_ycbcr;
-
    for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
       memset(&blend, 0, sizeof blend);
       blend.independent_blend_enable = 0;
@@ -442,9 +440,6 @@ error_blend:
    r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
 
 error_sampler_ref:
-   r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr);
-
-error_sampler_ycbcr:
    return false;
 }
 
@@ -456,7 +451,6 @@ cleanup_pipe_state(struct vl_mc *r)
    assert(r);
 
    r->pipe->delete_sampler_state(r->pipe, r->sampler_ref);
-   r->pipe->delete_sampler_state(r->pipe, r->sampler_ycbcr);
    for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
       r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]);
       r->pipe->delete_blend_state(r->pipe, r->blend_add[i]);
@@ -467,7 +461,10 @@ cleanup_pipe_state(struct vl_mc *r)
 bool
 vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe,
            unsigned buffer_width, unsigned buffer_height,
-           unsigned macroblock_size, float scale)
+           unsigned macroblock_size, float scale,
+           vl_mc_ycbcr_vert_shader vs_callback,
+           vl_mc_ycbcr_frag_shader fs_callback,
+           void *callback_priv)
 {
    assert(renderer);
    assert(pipe);
@@ -486,7 +483,7 @@ vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe,
    if (!renderer->vs_ref)
       goto error_vs_ref;
 
-   renderer->vs_ycbcr = create_ycbcr_vert_shader(renderer);
+   renderer->vs_ycbcr = create_ycbcr_vert_shader(renderer, vs_callback, callback_priv);
    if (!renderer->vs_ycbcr)
       goto error_vs_ycbcr;
 
@@ -494,7 +491,7 @@ vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe,
    if (!renderer->fs_ref)
       goto error_fs_ref;
 
-   renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale);
+   renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale, fs_callback, callback_priv);
    if (!renderer->fs_ycbcr)
       goto error_fs_ycbcr;
 
@@ -614,8 +611,7 @@ vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref)
 }
 
 void
-vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source,
-                   unsigned component, unsigned num_instances)
+vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances)
 {
    struct vl_mc *renderer;
 
@@ -631,8 +627,5 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source
    renderer->pipe->bind_vs_state(renderer->pipe, renderer->vs_ycbcr);
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
 
-   renderer->pipe->set_fragment_sampler_views(renderer->pipe, 1, &source);
-   renderer->pipe->bind_fragment_sampler_states(renderer->pipe, 1, &renderer->sampler_ycbcr);
-
    util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
 }
diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h
index 353afa9df62..85ec69b3ce7 100644
--- a/src/gallium/auxiliary/vl/vl_mc.h
+++ b/src/gallium/auxiliary/vl/vl_mc.h
@@ -31,6 +31,8 @@
 #include <pipe/p_state.h>
 #include <pipe/p_video_state.h>
 
+#include <tgsi/tgsi_ureg.h>
+
 #include "vl_defines.h"
 #include "vl_types.h"
 
@@ -51,7 +53,7 @@ struct vl_mc
    void *blend_add[VL_MC_NUM_BLENDERS];
    void *vs_ref, *vs_ycbcr;
    void *fs_ref, *fs_ycbcr;
-   void *sampler_ref, *sampler_ycbcr;
+   void *sampler_ref;
 };
 
 struct vl_mc_buffer
@@ -64,9 +66,22 @@ struct vl_mc_buffer
    struct pipe_framebuffer_state fb_state;
 };
 
+typedef void (*vl_mc_ycbcr_vert_shader)(void *priv, struct vl_mc *mc,
+                                        struct ureg_program *shader,
+                                        unsigned first_output,
+                                        struct ureg_dst tex);
+
+typedef void (*vl_mc_ycbcr_frag_shader)(void *priv, struct vl_mc *mc,
+                                        struct ureg_program *shader,
+                                        unsigned first_input,
+                                        struct ureg_dst dst);
+
 bool vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe,
                 unsigned picture_width, unsigned picture_height,
-                unsigned macroblock_size, float scale);
+                unsigned macroblock_size, float scale,
+                vl_mc_ycbcr_vert_shader vs_callback,
+                vl_mc_ycbcr_frag_shader fs_callback,
+                void *callback_priv);
 
 void vl_mc_cleanup(struct vl_mc *renderer);
 
@@ -78,7 +93,6 @@ void vl_mc_set_surface(struct vl_mc_buffer *buffer, struct pipe_surface *surface
 
 void vl_mc_render_ref(struct vl_mc_buffer *buffer, struct pipe_sampler_view *ref);
 
-void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, struct pipe_sampler_view *source,
-                        unsigned component, unsigned num_instances);
+void vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances);
 
 #endif /* vl_mc_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 37789707a6b..74ec4b1db7b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -145,7 +145,7 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer)
 {
    enum pipe_format formats[3];
 
-   struct pipe_sampler_view **idct_source_sv, **idct_intermediate_sv;
+   struct pipe_sampler_view **idct_source_sv, **mc_source_sv;
    struct pipe_surface **idct_surfaces;
 
    struct vl_mpeg12_decoder *dec;
@@ -164,23 +164,23 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer)
    if (!buffer->idct_source)
       goto error_source;
 
-   formats[0] = formats[1] = formats[2] = dec->idct_intermediate_format;
-   buffer->idct_intermediate = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                                    dec->base.width / dec->nr_of_idct_render_targets,
-                                                    dec->base.height / 4, dec->nr_of_idct_render_targets,
-                                                    dec->base.chroma_format,
-                                                    formats, PIPE_USAGE_STATIC);
+   formats[0] = formats[1] = formats[2] = dec->mc_source_format;
+   buffer->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                            dec->base.width / dec->nr_of_idct_render_targets,
+                                            dec->base.height / 4, dec->nr_of_idct_render_targets,
+                                            dec->base.chroma_format,
+                                            formats, PIPE_USAGE_STATIC);
 
-   if (!buffer->idct_intermediate)
-      goto error_intermediate;
+   if (!buffer->mc_source)
+      goto error_mc_source;
 
    idct_source_sv = buffer->idct_source->get_sampler_view_planes(buffer->idct_source);
    if (!idct_source_sv)
       goto error_source_sv;
 
-   idct_intermediate_sv = buffer->idct_intermediate->get_sampler_view_planes(buffer->idct_intermediate);
-   if (!idct_intermediate_sv)
-      goto error_intermediate_sv;
+   mc_source_sv = buffer->mc_source->get_sampler_view_planes(buffer->mc_source);
+   if (!mc_source_sv)
+      goto error_mc_source_sv;
 
    idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source);
    if (!idct_surfaces)
@@ -189,7 +189,7 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer)
    for (i = 0; i < 3; ++i)
       if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
                                &buffer->idct[i], idct_source_sv[i],
-                               idct_intermediate_sv[i], idct_surfaces[i]))
+                               mc_source_sv[i], idct_surfaces[i]))
          goto error_plane;
 
    return true;
@@ -199,11 +199,11 @@ error_plane:
       vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
 
 error_surfaces:
-error_intermediate_sv:
+error_mc_source_sv:
 error_source_sv:
-   buffer->idct_intermediate->destroy(buffer->idct_intermediate);
+   buffer->mc_source->destroy(buffer->mc_source);
 
-error_intermediate:
+error_mc_source:
    buffer->idct_source->destroy(buffer->idct_source);
 
 error_source:
@@ -223,7 +223,6 @@ cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
    vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
    vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
    buf->idct_source->destroy(buf->idct_source);
-   buf->idct_intermediate->destroy(buf->idct_intermediate);
 }
 
 static bool
@@ -237,14 +236,16 @@ init_mc_buffer(struct vl_mpeg12_buffer *buf)
    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
    assert(dec);
 
-   formats[0] = formats[1] = formats[2] =dec->mc_source_format;
-   buf->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                            dec->base.width, dec->base.height, 1,
-                                            dec->base.chroma_format,
-                                            formats, PIPE_USAGE_STATIC);
+   if (dec->base.entrypoint > PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      formats[0] = formats[1] = formats[2] =dec->mc_source_format;
+      buf->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                               dec->base.width, dec->base.height, 1,
+                                               dec->base.chroma_format,
+                                               formats, PIPE_USAGE_STATIC);
 
-   if (!buf->mc_source)
-      goto error_mc_source;
+      if (!buf->mc_source)
+         goto error_mc_source;
+   }
 
    if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0]))
       goto error_mc_y;
@@ -420,6 +421,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    dec->pipe->bind_fs_state(dec->pipe, NULL);
 
    dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa);
+   dec->pipe->delete_sampler_state(dec->pipe, dec->sampler_ycbcr);
 
    vl_mc_cleanup(&dec->mc_y);
    vl_mc_cleanup(&dec->mc_c);
@@ -563,7 +565,14 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 
          vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component);
          dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
-         vl_mc_render_ycbcr(&buf->mc[i], mc_source_sv[component], j, num_ycbcr_blocks[component]);
+
+         if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
+            vl_idct_prepare_stage2(component == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[component]);
+         else {
+            dec->pipe->set_fragment_sampler_views(dec->pipe, 1, &mc_source_sv[component]);
+            dec->pipe->bind_fragment_sampler_states(dec->pipe, 1, &dec->sampler_ycbcr);
+         }
+         vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]);
       }
    }
 }
@@ -572,6 +581,7 @@ static bool
 init_pipe_state(struct vl_mpeg12_decoder *dec)
 {
    struct pipe_depth_stencil_alpha_state dsa;
+   struct pipe_sampler_state sampler;
    unsigned i;
 
    assert(dec);
@@ -595,6 +605,20 @@ init_pipe_state(struct vl_mpeg12_decoder *dec)
    dec->dsa = dec->pipe->create_depth_stencil_alpha_state(dec->pipe, &dsa);
    dec->pipe->bind_depth_stencil_alpha_state(dec->pipe, dec->dsa);
 
+   memset(&sampler, 0, sizeof(sampler));
+   sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+   sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_BORDER;
+   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
+   sampler.compare_func = PIPE_FUNC_ALWAYS;
+   sampler.normalized_coords = 1;
+   dec->sampler_ycbcr = dec->pipe->create_sampler_state(dec->pipe, &sampler);
+   if (!dec->sampler_ycbcr)
+      return false;
+
    return true;
 }
 
@@ -675,10 +699,10 @@ init_idct(struct vl_mpeg12_decoder *dec)
    if (dec->idct_source_format == PIPE_FORMAT_NONE)
       return false;
 
-   dec->idct_intermediate_format = find_first_supported_format(dec, const_idct_intermediate_formats,
-                                                               num_idct_intermediate_formats, PIPE_TEXTURE_3D);
+   dec->mc_source_format = find_first_supported_format(dec, const_idct_intermediate_formats,
+                                                       num_idct_intermediate_formats, PIPE_TEXTURE_3D);
 
-   if (dec->idct_intermediate_format == PIPE_FORMAT_NONE)
+   if (dec->mc_source_format == PIPE_FORMAT_NONE)
       return false;
 
    switch (dec->idct_source_format) {
@@ -695,8 +719,8 @@ init_idct(struct vl_mpeg12_decoder *dec)
       return false;
    }
 
-   if (dec->idct_intermediate_format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
-       dec->idct_intermediate_format == PIPE_FORMAT_R32G32B32A32_FLOAT)
+   if (dec->mc_source_format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
+       dec->mc_source_format == PIPE_FORMAT_R32G32B32A32_FLOAT)
       transpose_scale = 1.0f;
    else
       transpose_scale = matrix_scale = sqrt(matrix_scale);
@@ -738,6 +762,49 @@ error_matrix:
    return false;
 }
 
+static void
+mc_vert_shader_callback(void *priv, struct vl_mc *mc,
+                        struct ureg_program *shader,
+                        unsigned first_output,
+                        struct ureg_dst tex)
+{
+   struct vl_mpeg12_decoder *dec = priv;
+   struct ureg_dst o_vtex;
+
+   assert(priv && mc);
+   assert(shader);
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c;
+      vl_idct_stage2_vert_shader(idct, shader, first_output, tex);
+   } else {
+      o_vtex = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, first_output);
+      ureg_MOV(shader, ureg_writemask(o_vtex, TGSI_WRITEMASK_XY), ureg_src(tex));
+   }
+}
+
+static void
+mc_frag_shader_callback(void *priv, struct vl_mc *mc,
+                        struct ureg_program *shader,
+                        unsigned first_input,
+                        struct ureg_dst dst)
+{
+   struct vl_mpeg12_decoder *dec = priv;
+   struct ureg_src src, sampler;
+
+   assert(priv && mc);
+   assert(shader);
+
+   if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
+      struct vl_idct *idct = mc == &dec->mc_y ? &dec->idct_y : &dec->idct_c;
+      vl_idct_stage2_frag_shader(idct, shader, first_input, dst);
+   } else {
+      src = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, first_input, TGSI_INTERPOLATE_LINEAR);
+      sampler = ureg_DECL_sampler(shader, 0);
+      ureg_TEX(shader, dst, TGSI_TEXTURE_2D, src, sampler);
+   }
+}
+
 struct pipe_video_decoder *
 vl_create_mpeg12_decoder(struct pipe_video_context *context,
                          struct pipe_context *pipe,
@@ -785,12 +852,6 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    /* TODO: Implement 422, 444 */
    assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 
-   dec->mc_source_format = find_first_supported_format(dec, const_mc_source_formats,
-                                                       num_mc_source_formats, PIPE_TEXTURE_3D);
-
-   if (dec->mc_source_format == PIPE_FORMAT_NONE)
-      return NULL;
-
    if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
       dec->chroma_width = dec->base.width / 2;
       dec->chroma_height = dec->base.height / 2;
@@ -813,6 +874,12 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
       else
          mc_scale = 1.0f;
    } else {
+      dec->mc_source_format = find_first_supported_format(dec, const_mc_source_formats,
+                                                          num_mc_source_formats, PIPE_TEXTURE_3D);
+
+      if (dec->mc_source_format == PIPE_FORMAT_NONE)
+         return NULL;
+
       switch (dec->mc_source_format) {
       case PIPE_FORMAT_R16_SNORM:
          mc_scale = SCALE_FACTOR_SNORM;
@@ -828,11 +895,13 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
       }
    }
 
-   if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, mc_scale))
+   if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, mc_scale,
+                   mc_vert_shader_callback, mc_frag_shader_callback, dec))
       goto error_mc_y;
 
    // TODO
-   if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, mc_scale))
+   if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, mc_scale,
+                   mc_vert_shader_callback, mc_frag_shader_callback, dec))
       goto error_mc_c;
 
    if (!init_pipe_state(dec))
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 9d5768816fb..e483ace03b4 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -53,7 +53,6 @@ struct vl_mpeg12_decoder
 
    enum pipe_format zscan_source_format;
    enum pipe_format idct_source_format;
-   enum pipe_format idct_intermediate_format;
    enum pipe_format mc_source_format;
 
    struct pipe_vertex_buffer quads;
@@ -62,6 +61,8 @@ struct vl_mpeg12_decoder
    void *ves_ycbcr;
    void *ves_mv;
 
+   void *sampler_ycbcr;
+
    struct vl_zscan zscan_y, zscan_c;
    struct vl_idct idct_y, idct_c;
    struct vl_mc mc_y, mc_c;
@@ -77,7 +78,6 @@ struct vl_mpeg12_buffer
 
    struct pipe_video_buffer *zscan_source;
    struct pipe_video_buffer *idct_source;
-   struct pipe_video_buffer *idct_intermediate;
    struct pipe_video_buffer *mc_source;
 
    struct vl_zscan_buffer zscan[VL_MAX_PLANES];
-- 
cgit v1.2.3


From 7cde6722b01732e5a897771b56bf8d13719153ec Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 25 Apr 2011 10:33:48 +0200
Subject: xvmc: futher optimize ycbcr upload

---
 src/gallium/state_trackers/xorg/xvmc/surface.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index fc4593dbc5f..a80515839ee 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -150,12 +150,15 @@ UploadYcbcrBlocks(XvMCSurfacePrivate *surface,
    enum pipe_mpeg12_dct_intra intra;
    enum pipe_mpeg12_dct_type coding;
 
-   unsigned tb, x, y;
+   unsigned tb, x, y, luma_blocks;
    short *blocks;
 
    assert(surface);
    assert(xvmc_mb);
 
+   if (!xvmc_mb->coded_block_pattern)
+      return;
+
    intra = xvmc_mb->macroblock_type & XVMC_MB_TYPE_INTRA ?
            PIPE_MPEG12_DCT_INTRA : PIPE_MPEG12_DCT_DELTA;
 
@@ -164,7 +167,7 @@ UploadYcbcrBlocks(XvMCSurfacePrivate *surface,
 
    blocks = xvmc_blocks->blocks + xvmc_mb->index * BLOCK_SIZE_SAMPLES;
 
-   for (y = 0; y < 2; ++y) {
+   for (y = 0, luma_blocks = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
          if (xvmc_mb->coded_block_pattern & const_empty_block_mask_420[0][y][x]) {
 
@@ -174,16 +177,20 @@ UploadYcbcrBlocks(XvMCSurfacePrivate *surface,
             stream->intra = intra;
             stream->coding = coding;
 
-            memcpy(surface->ycbcr[0].buffer, blocks, BLOCK_SIZE_BYTES);
-
             surface->ycbcr[0].num_blocks_added++;
             surface->ycbcr[0].stream++;
-            surface->ycbcr[0].buffer += BLOCK_SIZE_SAMPLES;
-            blocks += BLOCK_SIZE_SAMPLES;
+
+            luma_blocks++;
          }
       }
    }
 
+   if (luma_blocks > 0) {
+      memcpy(surface->ycbcr[0].buffer, blocks, BLOCK_SIZE_BYTES * luma_blocks);
+      surface->ycbcr[0].buffer += BLOCK_SIZE_SAMPLES * luma_blocks;
+      blocks += BLOCK_SIZE_SAMPLES * luma_blocks;
+   }
+
    /* TODO: Implement 422, 444 */
    //assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 
@@ -224,7 +231,8 @@ MacroBlocksToPipe(XvMCSurfacePrivate *surface,
       unsigned mv_pos = xvmc_mb->x + surface->mv_stride * xvmc_mb->y;
       unsigned mv_weights[2];
 
-      UploadYcbcrBlocks(surface, xvmc_mb, xvmc_blocks);
+      if (xvmc_mb->macroblock_type & (XVMC_MB_TYPE_PATTERN | XVMC_MB_TYPE_INTRA))
+         UploadYcbcrBlocks(surface, xvmc_mb, xvmc_blocks);
 
       MacroBlockTypeToPipeWeights(xvmc_mb, mv_weights);
 
-- 
cgit v1.2.3


From 5aa26412432dbdb3b1677d6d2f74bba010f443ae Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 25 Apr 2011 11:39:41 +0200
Subject: vdpau: implement and cleanup PresentationQueueTarget destruction

---
 src/gallium/state_trackers/vdpau/device.c        | 29 ++++++++++++++++++++----
 src/gallium/state_trackers/vdpau/htab.c          | 10 ++++++++
 src/gallium/state_trackers/vdpau/presentation.c  |  8 ++-----
 src/gallium/state_trackers/vdpau/vdpau_private.h |  2 ++
 4 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index 7906757ec1b..b3de0f29305 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -27,9 +27,12 @@
 
 #include <pipe/p_compiler.h>
 #include <pipe/p_video_context.h>
-#include <vl_winsys.h>
+
 #include <util/u_memory.h>
 #include <util/u_debug.h>
+
+#include <vl_winsys.h>
+
 #include "vdpau_private.h"
 
 PUBLIC VdpStatus
@@ -94,8 +97,8 @@ PUBLIC VdpStatus
 vlVdpPresentationQueueTargetCreateX11(VdpDevice device, Drawable drawable,
                                       VdpPresentationQueueTarget *target)
 {
-   VdpStatus    ret;
-   vlVdpPresentationQueueTarget *pqt = NULL;
+   vlVdpPresentationQueueTarget *pqt;
+   VdpStatus ret;
 
    debug_printf("[VDPAU] Creating PresentationQueueTarget\n");
 
@@ -122,10 +125,27 @@ vlVdpPresentationQueueTargetCreateX11(VdpDevice device, Drawable drawable,
    return VDP_STATUS_OK;
 
 no_handle:
-   FREE(dev);
+   FREE(pqt);
    return ret;
 }
 
+VdpStatus
+vlVdpPresentationQueueTargetDestroy(VdpPresentationQueueTarget presentation_queue_target)
+{
+   vlVdpPresentationQueueTarget *pqt;
+
+   debug_printf("[VDPAU] Destroying PresentationQueueTarget\n");
+
+   pqt = vlGetDataHTAB(presentation_queue_target);
+   if (!pqt)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   vlRemoveDataHTAB(presentation_queue_target);
+   FREE(pqt);
+
+   return VDP_STATUS_OK;
+}
+
 VdpStatus
 vlVdpDeviceDestroy(VdpDevice device)
 {
@@ -134,6 +154,7 @@ vlVdpDeviceDestroy(VdpDevice device)
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
+
    FREE(dev);
    vlDestroyHTAB();
 
diff --git a/src/gallium/state_trackers/vdpau/htab.c b/src/gallium/state_trackers/vdpau/htab.c
index 0c958055374..20f5a171f19 100644
--- a/src/gallium/state_trackers/vdpau/htab.c
+++ b/src/gallium/state_trackers/vdpau/htab.c
@@ -92,3 +92,13 @@ void* vlGetDataHTAB(vlHandle handle)
    return (void*)handle;
 #endif
 }
+
+void vlRemoveDataHTAB(vlHandle handle)
+{
+#ifdef VL_HANDLES
+   pipe_mutex_lock(htab_lock);
+   if (htab)
+      handle_table_remove(htab, handle);
+   pipe_mutex_unlock(htab_lock);
+#endif
+}
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index 063c63fb4ee..2837e7a306f 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -27,16 +27,12 @@
 
 #include <stdio.h>
 
-#include "vdpau_private.h"
 #include <vdpau/vdpau.h>
+
 #include <util/u_debug.h>
 #include <util/u_memory.h>
 
-VdpStatus
-vlVdpPresentationQueueTargetDestroy(VdpPresentationQueueTarget presentation_queue_target)
-{
-   return VDP_STATUS_NO_IMPLEMENTATION;
-}
+#include "vdpau_private.h"
 
 VdpStatus
 vlVdpPresentationQueueCreate(VdpDevice device,
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index bd77507567f..ac1f9ccef99 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -222,6 +222,8 @@ boolean vlCreateHTAB(void);
 void vlDestroyHTAB(void);
 vlHandle vlAddDataHTAB(void *data);
 void* vlGetDataHTAB(vlHandle handle);
+void vlRemoveDataHTAB(vlHandle handle);
+
 boolean vlGetFuncFTAB(VdpFuncId function_id, void **func);
 
 /* Public functions */
-- 
cgit v1.2.3


From 596f09aa7bafd769912b1c0efe97434dff4c3f0b Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sat, 5 Mar 2011 15:54:27 +0100
Subject: r300g: implement draw_instanced for HWTCL

---
 src/gallium/drivers/r300/r300_context.c       | 26 ++++++++++++++
 src/gallium/drivers/r300/r300_context.h       |  3 ++
 src/gallium/drivers/r300/r300_emit.c          | 43 ++++++++++++++++++-----
 src/gallium/drivers/r300/r300_render.c        | 12 +++++++
 src/gallium/drivers/r300/r300_state.c         | 33 +++++++++++++++---
 src/gallium/drivers/r300/r300_state_derived.c |  4 ++-
 src/gallium/drivers/r300/r300_tgsi_to_rc.c    | 49 +++++++++++++++++++++++++--
 src/gallium/drivers/r300/r300_tgsi_to_rc.h    |  3 ++
 src/gallium/drivers/r300/r300_vs.c            |  2 +-
 9 files changed, 158 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 15d1278c3bb..934871f6553 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -80,6 +80,7 @@ static void r300_release_referenced_objects(struct r300_context *r300)
     /* Manually-created vertex buffers. */
     pipe_resource_reference(&r300->dummy_vb, NULL);
     pipe_resource_reference(&r300->vbo, NULL);
+    pipe_resource_reference((struct pipe_resource**)&r300->vb_instanceid, NULL);
 
     /* If there are any queries pending or not destroyed, remove them now. */
     foreach_s(query, temp, &r300->query_list) {
@@ -493,6 +494,31 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
         r300->dummy_vb = screen->resource_create(screen, &vb);
     }
 
+    {
+        int i, num = 128000;
+        struct pipe_resource vb, *r;
+        struct pipe_transfer *transfer;
+        float *buf;
+
+        memset(&vb, 0, sizeof(vb));
+        vb.target = PIPE_BUFFER;
+        vb.format = PIPE_FORMAT_R8_UNORM;
+        vb.bind = PIPE_BIND_VERTEX_BUFFER;
+        vb.usage = PIPE_USAGE_IMMUTABLE;
+        vb.width0 = 4 * num;
+        vb.height0 = 1;
+        vb.depth0 = 1;
+
+        r = screen->resource_create(screen, &vb);
+
+        buf = pipe_buffer_map(&r300->context, r, PIPE_TRANSFER_WRITE, &transfer);
+        for (i = 0; i < num; i++)
+            buf[i] = i;
+        pipe_buffer_unmap(&r300->context, transfer);
+
+        r300->vb_instanceid = r300_resource(r);
+    }
+
     {
         struct pipe_depth_stencil_alpha_state dsa;
         memset(&dsa, 0, sizeof(dsa));
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 8a0a54cf1e9..8f42431f8f7 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -431,6 +431,7 @@ struct r300_vertex_element_state {
     unsigned vertex_size_dwords;
 
     struct r300_vertex_stream_state vertex_stream;
+    struct r300_vertex_stream_state vertex_stream_instanced;
 };
 
 enum r300_hiz_func {
@@ -490,6 +491,8 @@ struct r300_context {
     /* When no vertex buffer is set, this one is used instead to prevent
      * hardlocks. */
     struct pipe_resource *dummy_vb;
+    /* Vertex buffer for InstanceID. */
+    struct r300_resource *vb_instanceid;
 
     /* The currently active query. */
     struct r300_query *query_current;
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 62435c5e2e2..173fd5dd80f 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -816,15 +816,17 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
     struct r300_resource *buf;
     int i;
     unsigned vertex_array_count = r300->velems->count;
-    unsigned packet_size = (vertex_array_count * 3 + 1) / 2;
+    unsigned real_vertex_array_count = vertex_array_count +
+                (vertex_array_count == 16 || instance_id == -1 ? 0 : 1);
+    unsigned packet_size = (real_vertex_array_count * 3 + 1) / 2;
     struct pipe_vertex_buffer *vb1, *vb2;
     unsigned *hw_format_size = r300->velems->format_size;
     unsigned size1, size2, offset1, offset2, stride1, stride2;
     CS_LOCALS(r300);
 
-    BEGIN_CS(2 + packet_size + vertex_array_count * 2);
+    BEGIN_CS(2 + packet_size + real_vertex_array_count * 2);
     OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
-    OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
+    OUT_CS(real_vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
 
     if (instance_id == -1) {
         /* Non-instanced arrays. This ignores instance_divisor and instance_id. */
@@ -896,14 +898,28 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
                 offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;
             }
 
-            OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1));
-            OUT_CS(offset1);
+            /* Insert vertex buffer containing InstanceID. */
+            if (vertex_array_count < 16) {
+                OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) |
+                       R300_VBPNTR_SIZE1(4));
+                OUT_CS(offset1);
+                OUT_CS(4 * instance_id);
+            } else {
+                OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1));
+                OUT_CS(offset1);
+            }
+        } else if (vertex_array_count < 16) {
+            /* Insert vertex buffer containing InstanceID. */
+            OUT_CS(R300_VBPNTR_SIZE0(4));
+            OUT_CS(4 * instance_id);
         }
 
         for (i = 0; i < vertex_array_count; i++) {
             buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]);
             OUT_CS_RELOC(buf);
         }
+        if (vertex_array_count < 16)
+            OUT_CS_RELOC(r300->vb_instanceid);
     }
     END_CS;
 }
@@ -936,11 +952,18 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed)
 void r300_emit_vertex_stream_state(struct r300_context* r300,
                                    unsigned size, void* state)
 {
-    struct r300_vertex_stream_state *streams =
-        (struct r300_vertex_stream_state*)state;
+    struct r300_vertex_element_state *velems =
+        (struct r300_vertex_element_state*)state;
+    struct r300_vertex_stream_state *streams;
     unsigned i;
     CS_LOCALS(r300);
 
+    if (r300->screen->caps.has_tcl && r300->instancing_enabled) {
+        streams = &velems->vertex_stream_instanced;
+    } else {
+        streams = &velems->vertex_stream;
+    }
+
     if (DBG_ON(r300, DBG_PSC)) {
         fprintf(stderr, "r300: PSC emit:\n");
 
@@ -955,7 +978,7 @@ void r300_emit_vertex_stream_state(struct r300_context* r300,
         }
     }
 
-    BEGIN_CS(size);
+    BEGIN_CS((1 + streams->count) * 2);
     OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count);
     OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count);
     OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count);
@@ -1219,6 +1242,10 @@ validate:
             r300->rws->cs_add_reloc(r300->cs, r300_resource(*buf)->cs_buf,
                                     r300_resource(*buf)->domain, 0);
         }
+        if (r300->instancing_enabled) {
+            r300->rws->cs_add_reloc(r300->cs, r300->vb_instanceid->cs_buf,
+                                    r300->vb_instanceid->domain, 0);
+        }
     }
     /* ...and index buffer for HWTCL path. */
     if (index_buffer)
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 429b85545f7..3674edc975f 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -305,6 +305,18 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300,
                                           int index_bias,
                                           int instance_id)
 {
+    /* Update vertex elements for InstanceID here. */
+    boolean instancing_enabled = instance_id != -1;
+
+    if (r300->screen->caps.has_tcl &&
+        (flags & PREP_EMIT_AOS) &&
+        instancing_enabled != r300->instancing_enabled) {
+        r300->instancing_enabled = instancing_enabled;
+        r300_mark_atom_dirty(r300, &r300->vertex_stream_state);
+        r300->vertex_arrays_dirty = TRUE;
+        flags |= PREP_EMIT_STATES;
+    }
+
     /* Make sure there is enough space in the command stream and emit states. */
     if (r300_reserve_cs_dwords(r300, flags, cs_dwords))
         flags |= PREP_EMIT_STATES;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 24b41d5085d..da444f7c326 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1605,9 +1605,10 @@ static void r300_set_index_buffer(struct pipe_context* pipe,
 }
 
 /* Initialize the PSC tables. */
-static void r300_vertex_psc(struct r300_vertex_element_state *velems)
+static void r300_vertex_psc(struct r300_vertex_element_state *velems,
+                            struct r300_vertex_stream_state *vstream,
+                            boolean insert_instance_id_attrib)
 {
-    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
     uint16_t type, swizzle;
     enum pipe_format format;
     unsigned i;
@@ -1638,6 +1639,27 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems)
         }
     }
 
+    /* Insert attrib emulating InstanceID. */
+    if (i < 15 && insert_instance_id_attrib) {
+        format = PIPE_FORMAT_R32_FLOAT;
+
+        type = r300_translate_vertex_data_type(format);
+        assert(type != R300_INVALID_FORMAT);
+
+        type |= i << R300_DST_VEC_LOC_SHIFT;
+        swizzle = r300_translate_vertex_data_swizzle(format);
+
+        if (i & 1) {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+        } else {
+            vstream->vap_prog_stream_cntl[i >> 1] |= type;
+            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+        }
+
+        i++;
+    }
+
     /* Set the last vector in the PSC. */
     if (i) {
         i -= 1;
@@ -1680,7 +1702,8 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
     if (r300_screen(pipe->screen)->caps.has_tcl) {
         /* Setup PSC.
          * The unused components will be replaced by (..., 0, 1). */
-        r300_vertex_psc(velems);
+        r300_vertex_psc(velems, &velems->vertex_stream, FALSE);
+        r300_vertex_psc(velems, &velems->vertex_stream_instanced, TRUE);
 
         for (i = 0; i < count; i++) {
             velems->format_size[i] =
@@ -1711,8 +1734,8 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
         return;
     }
 
-    UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
-    r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
+    UPDATE_STATE(velems, r300->vertex_stream_state);
+    r300->vertex_stream_state.size = (1 + velems->vertex_stream_instanced.count) * 2;
     r300->vertex_arrays_dirty = TRUE;
 }
 
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index afc1451183d..a1dfd7d0c80 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -133,7 +133,9 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
 /* Update the PSC tables for SW TCL, using Draw. */
 static void r300_swtcl_vertex_psc(struct r300_context *r300)
 {
-    struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state;
+    struct r300_vertex_element_state *velems =
+            (struct r300_vertex_element_state*)r300->vertex_stream_state.state;
+    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
     struct vertex_info *vinfo = &r300->vertex_info;
     uint16_t type, swizzle;
     enum pipe_format format;
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 6a000cfe2c6..2ac52906d13 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -25,6 +25,7 @@
 #include "radeon_compiler.h"
 #include "radeon_program.h"
 
+#include "util/u_math.h"
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
@@ -168,6 +169,7 @@ static unsigned translate_register_file(unsigned file)
             /* fall-through */
         case TGSI_FILE_TEMPORARY: return RC_FILE_TEMPORARY;
         case TGSI_FILE_ADDRESS: return RC_FILE_ADDRESS;
+        case TGSI_FILE_SYSTEM_VALUE: return RC_FILE_INPUT;
     }
 }
 
@@ -179,6 +181,17 @@ static int translate_register_index(
     if (file == TGSI_FILE_IMMEDIATE)
         return ttr->immediate_offset + index;
 
+    if (file == TGSI_FILE_SYSTEM_VALUE) {
+        if (index == ttr->instance_id) {
+            return ttr->num_inputs;
+        } else {
+            fprintf(stderr, "Unknown system value semantic index: %i\n",
+                    index);
+            ttr->error = TRUE;
+            return 0;
+        }
+    }
+
     return index;
 }
 
@@ -268,7 +281,8 @@ static void transform_texture(struct rc_instruction * dst, struct tgsi_instructi
     }
 }
 
-static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src)
+static void transform_instruction(struct tgsi_to_rc * ttr,
+                                  struct tgsi_full_instruction * src)
 {
     struct rc_instruction * dst;
     int i;
@@ -328,6 +342,27 @@ static void handle_immediate(struct tgsi_to_rc * ttr,
     }
 }
 
+static void handle_declaration(struct tgsi_to_rc *ttr,
+                               struct tgsi_full_declaration *decl)
+{
+    switch (decl->Declaration.File) {
+    case TGSI_FILE_INPUT:
+        ttr->num_inputs = MAX2(ttr->num_inputs, decl->Range.First + 1);
+        break;
+
+    case TGSI_FILE_SYSTEM_VALUE:
+        if (decl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
+            printf("Got instance id\n");
+            ttr->instance_id = decl->Range.First;
+        } else {
+            fprintf(stderr, "Unknown system value semantic: %i.\n",
+                    decl->Semantic.Name);
+            ttr->error = TRUE;
+        }
+        break;
+    }
+}
+
 void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
                      const struct tgsi_token * tokens)
 {
@@ -336,6 +371,8 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
     unsigned imm_index = 0;
     int i;
 
+    ttr->num_inputs = 0;
+    ttr->instance_id = -1;
     ttr->error = FALSE;
 
     /* Allocate constants placeholders.
@@ -362,21 +399,29 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
 
         switch (parser.FullToken.Token.Type) {
             case TGSI_TOKEN_TYPE_DECLARATION:
+                handle_declaration(ttr, &parser.FullToken.FullDeclaration);
+                if (ttr->error)
+                    goto end_while;
                 break;
+
             case TGSI_TOKEN_TYPE_IMMEDIATE:
                 handle_immediate(ttr, &parser.FullToken.FullImmediate, imm_index);
                 imm_index++;
                 break;
+
             case TGSI_TOKEN_TYPE_INSTRUCTION:
                 inst = &parser.FullToken.FullInstruction;
                 if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
-                    break;
+                    goto end_while;
                 }
 
                 transform_instruction(ttr, inst);
+                if (ttr->error)
+                    goto end_while;
                 break;
         }
     }
+end_while:
 
     tgsi_parse_free(&parser);
 
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
index adb044cfe56..c9bd6277266 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
@@ -44,6 +44,9 @@ struct tgsi_to_rc {
     struct swizzled_imms * imms_to_swizzle;
     unsigned imms_to_swizzle_count;
 
+    int num_inputs;
+    int instance_id;
+
     /* Vertex shaders have no half swizzles, and no way to handle them, so
      * until rc grows proper support, indicate if they're safe to use. */
     boolean use_half_swizzles;
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index b319890157f..90eba5a8f45 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -103,7 +103,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
                               outputs->bcolor[1] != ATTR_UNUSED;
 
     /* Fill in the input mapping */
-    for (i = 0; i < info->num_inputs; i++)
+    for (i = 0; i < info->num_inputs+1; i++)
         c->code->inputs[i] = i;
 
     /* Position. */
-- 
cgit v1.2.3


From 38bd8131776879e9dc90d06848657756a4a13a66 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 26 Apr 2011 01:49:07 +0200
Subject: vdpau: add stups for the missing functions

---
 src/gallium/state_trackers/vdpau/decode.c        |  9 +++
 src/gallium/state_trackers/vdpau/device.c        |  2 +
 src/gallium/state_trackers/vdpau/ftab.c          | 46 +++++++-------
 src/gallium/state_trackers/vdpau/mixer.c         | 42 +++++++++++++
 src/gallium/state_trackers/vdpau/output.c        | 79 ++++++++++++++++++++++++
 src/gallium/state_trackers/vdpau/query.c         | 15 +++++
 src/gallium/state_trackers/vdpau/surface.c       | 13 ++--
 src/gallium/state_trackers/vdpau/vdpau_private.h | 15 +++++
 8 files changed, 193 insertions(+), 28 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 7905227597a..12cc6c7b42f 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -238,6 +238,15 @@ vlVdpDecoderRenderMpeg2(vlVdpDecoder *vldecoder,
 }
 #endif
 
+VdpStatus
+vlVdpDecoderGetParameters(VdpDecoder decoder,
+                          VdpDecoderProfile *profile,
+                          uint32_t *width,
+                          uint32_t *height)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
 VdpStatus
 vlVdpDecoderRender(VdpDecoder decoder,
                    VdpVideoSurface target,
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index b3de0f29305..c0bf56edc2e 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -176,6 +176,8 @@ vlVdpGetProcAddress(VdpDevice device, VdpFuncId function_id, void **function_poi
    if (!vlGetFuncFTAB(function_id, function_pointer))
       return VDP_STATUS_INVALID_FUNC_ID;
 
+   debug_printf("[VDPAU] Got proc adress %p for id %d\n", *function_pointer, function_id);
+
    return VDP_STATUS_OK;
 }
 
diff --git a/src/gallium/state_trackers/vdpau/ftab.c b/src/gallium/state_trackers/vdpau/ftab.c
index de08b810268..66ed50c3299 100644
--- a/src/gallium/state_trackers/vdpau/ftab.c
+++ b/src/gallium/state_trackers/vdpau/ftab.c
@@ -33,7 +33,7 @@ static void* ftab[67] =
    &vlVdpGetErrorString, /* VDP_FUNC_ID_GET_ERROR_STRING */
    &vlVdpGetProcAddress, /* VDP_FUNC_ID_GET_PROC_ADDRESS */
    &vlVdpGetApiVersion, /* VDP_FUNC_ID_GET_API_VERSION */
-   0x55,					/* DUMMY */
+   NULL, /* DUMMY */
    &vlVdpGetInformationString, /* VDP_FUNC_ID_GET_INFORMATION_STRING */
    &vlVdpDeviceDestroy, /* VDP_FUNC_ID_DEVICE_DESTROY */
    &vlVdpGenerateCSCMatrix, /* VDP_FUNC_ID_GENERATE_CSC_MATRIX */
@@ -46,30 +46,30 @@ static void* ftab[67] =
    &vlVdpVideoSurfacePutBitsYCbCr, /* VDP_FUNC_ID_VIDEO_SURFACE_PUT_BITS_Y_CB_CR */
    &vlVdpOutputSurfaceQueryCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_CAPABILITIES */
    &vlVdpOutputSurfaceQueryGetPutBitsNativeCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_GET_PUT_BITS_NATIVE_CAPABILITIES */
-   0x2, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_PUT_BITS_INDEXED_CAPABILITIES */
+   &vlVdpOutputSurfaceQueryPutBitsIndexedCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_PUT_BITS_INDEXED_CAPABILITIES */
    &vlVdpOutputSurfaceQueryPutBitsYCbCrCapabilities, /* VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_PUT_BITS_Y_CB_CR_CAPABILITIES */
    &vlVdpOutputSurfaceCreate, /* VDP_FUNC_ID_OUTPUT_SURFACE_CREATE */
-   0x3, /* VDP_FUNC_ID_OUTPUT_SURFACE_DESTROY */
-   0x4, /* VDP_FUNC_ID_OUTPUT_SURFACE_GET_PARAMETERS */
-   0x5, /* VDP_FUNC_ID_OUTPUT_SURFACE_GET_BITS_NATIVE */
-   0x6, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_NATIVE */
-   0x7, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_INDEXED */
-   0x8, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_Y_CB_CR */
+   &vlVdpOutputSurfaceDestroy, /* VDP_FUNC_ID_OUTPUT_SURFACE_DESTROY */
+   &vlVdpOutputSurfaceGetParameters, /* VDP_FUNC_ID_OUTPUT_SURFACE_GET_PARAMETERS */
+   &vlVdpOutputSurfaceGetBitsNative, /* VDP_FUNC_ID_OUTPUT_SURFACE_GET_BITS_NATIVE */
+   &vlVdpOutputSurfacePutBitsNative, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_NATIVE */
+   &vlVdpOutputSurfacePutBitsIndexed, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_INDEXED */
+   &vlVdpOutputSurfacePutBitsYCbCr, /* VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_Y_CB_CR */
    &vlVdpBitmapSurfaceQueryCapabilities, /* VDP_FUNC_ID_BITMAP_SURFACE_QUERY_CAPABILITIES */
    &vlVdpBitmapSurfaceCreate, /* VDP_FUNC_ID_BITMAP_SURFACE_CREATE */
    &vlVdpBitmapSurfaceDestroy, /* VDP_FUNC_ID_BITMAP_SURFACE_DESTROY */
    &vlVdpBitmapSurfaceGetParameters, /* VDP_FUNC_ID_BITMAP_SURFACE_GET_PARAMETERS */
    &vlVdpBitmapSurfacePutBitsNative, /* VDP_FUNC_ID_BITMAP_SURFACE_PUT_BITS_NATIVE */
-   0x55,	/* DUMMY */
-   0x55,	/* DUMMY */
-   0x55,	/* DUMMY */
-   0x9, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_OUTPUT_SURFACE */
-   0x10, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_BITMAP_SURFACE */
-   0x11, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_VIDEO_SURFACE_LUMA */
+   NULL, /* DUMMY */
+   NULL, /* DUMMY */
+   NULL, /* DUMMY */
+   &vlVdpOutputSurfaceRenderOutputSurface, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_OUTPUT_SURFACE */
+   &vlVdpOutputSurfaceRenderBitmapSurface, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_BITMAP_SURFACE */
+   NULL, /* VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_VIDEO_SURFACE_LUMA */
    &vlVdpDecoderQueryCapabilities, /* VDP_FUNC_ID_DECODER_QUERY_CAPABILITIES */
    &vlVdpDecoderCreate, /* VDP_FUNC_ID_DECODER_CREATE */
    &vlVdpDecoderDestroy, /* VDP_FUNC_ID_DECODER_DESTROY */
-   0x12, /* VDP_FUNC_ID_DECODER_GET_PARAMETERS */
+   &vlVdpDecoderGetParameters, /* VDP_FUNC_ID_DECODER_GET_PARAMETERS */
    &vlVdpDecoderRender, /* VDP_FUNC_ID_DECODER_RENDER */
    &vlVdpVideoMixerQueryFeatureSupport, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_FEATURE_SUPPORT */
    &vlVdpVideoMixerQueryParameterSupport, /* VDP_FUNC_ID_VIDEO_MIXER_QUERY_PARAMETER_SUPPORT */
@@ -79,19 +79,19 @@ static void* ftab[67] =
    &vlVdpVideoMixerCreate, /* VDP_FUNC_ID_VIDEO_MIXER_CREATE */
    &vlVdpVideoMixerSetFeatureEnables, /* VDP_FUNC_ID_VIDEO_MIXER_SET_FEATURE_ENABLES */
    &vlVdpVideoMixerSetAttributeValues, /* VDP_FUNC_ID_VIDEO_MIXER_SET_ATTRIBUTE_VALUES */
-   0x16, /* VDP_FUNC_ID_VIDEO_MIXER_GET_FEATURE_SUPPORT */
-   0x17, /* VDP_FUNC_ID_VIDEO_MIXER_GET_FEATURE_ENABLES */
-   0x18, /* VDP_FUNC_ID_VIDEO_MIXER_GET_PARAMETER_VALUES */
-   0x19, /* VDP_FUNC_ID_VIDEO_MIXER_GET_ATTRIBUTE_VALUES */
-   0x20, /* VDP_FUNC_ID_VIDEO_MIXER_DESTROY */
+   &vlVdpVideoMixerGetFeatureSupport, /* VDP_FUNC_ID_VIDEO_MIXER_GET_FEATURE_SUPPORT */
+   &vlVdpVideoMixerGetFeatureEnables, /* VDP_FUNC_ID_VIDEO_MIXER_GET_FEATURE_ENABLES */
+   &vlVdpVideoMixerGetParameterValues, /* VDP_FUNC_ID_VIDEO_MIXER_GET_PARAMETER_VALUES */
+   &vlVdpVideoMixerGetAttributeValues, /* VDP_FUNC_ID_VIDEO_MIXER_GET_ATTRIBUTE_VALUES */
+   &vlVdpVideoMixerDestroy, /* VDP_FUNC_ID_VIDEO_MIXER_DESTROY */
    &vlVdpVideoMixerRender, /* VDP_FUNC_ID_VIDEO_MIXER_RENDER */
    &vlVdpPresentationQueueTargetDestroy, /* VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_DESTROY */
    &vlVdpPresentationQueueCreate, /* VDP_FUNC_ID_PRESENTATION_QUEUE_CREATE */
    &vlVdpPresentationQueueDestroy, /* VDP_FUNC_ID_PRESENTATION_QUEUE_DESTROY */
    &vlVdpPresentationQueueSetBackgroundColor, /* VDP_FUNC_ID_PRESENTATION_QUEUE_SET_BACKGROUND_COLOR */
    &vlVdpPresentationQueueGetBackgroundColor, /* VDP_FUNC_ID_PRESENTATION_QUEUE_GET_BACKGROUND_COLOR */
-   0x55,	/* DUMMY */
-   0x55,	/* DUMMY */
+   NULL, /* DUMMY */
+   NULL, /* DUMMY */
    &vlVdpPresentationQueueGetTime, /* VDP_FUNC_ID_PRESENTATION_QUEUE_GET_TIME */
    &vlVdpPresentationQueueDisplay, /* VDP_FUNC_ID_PRESENTATION_QUEUE_DISPLAY */
    &vlVdpPresentationQueueBlockUntilSurfaceIdle, /* VDP_FUNC_ID_PRESENTATION_QUEUE_BLOCK_UNTIL_SURFACE_IDLE */
@@ -118,5 +118,5 @@ boolean vlGetFuncFTAB(VdpFuncId function_id, void **func)
         return FALSE;
       *func = ftab_winsys[function_id];
    }
-   return TRUE;
+   return *func != NULL;
 }
diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index 83cbf8abdb1..bafd84f4c51 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -86,6 +86,12 @@ no_handle:
    return ret;
 }
 
+VdpStatus
+vlVdpVideoMixerDestroy(VdpVideoMixer mixer)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
 VdpStatus
 vlVdpVideoMixerSetFeatureEnables(VdpVideoMixer mixer,
                                  uint32_t feature_count,
@@ -168,3 +174,39 @@ vlVdpVideoMixerSetAttributeValues(VdpVideoMixer mixer,
 
    return VDP_STATUS_OK;
 }
+
+VdpStatus
+vlVdpVideoMixerGetFeatureSupport(VdpVideoMixer mixer,
+                                 uint32_t feature_count,
+                                 VdpVideoMixerFeature const *features,
+                                 VdpBool *feature_supports)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpVideoMixerGetFeatureEnables(VdpVideoMixer mixer,
+                                 uint32_t feature_count,
+                                 VdpVideoMixerFeature const *features,
+                                 VdpBool *feature_enables)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpVideoMixerGetParameterValues(VdpVideoMixer mixer,
+                                  uint32_t parameter_count,
+                                  VdpVideoMixerParameter const *parameters,
+                                  void *const *parameter_values)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpVideoMixerGetAttributeValues(VdpVideoMixer mixer,
+                                  uint32_t attribute_count,
+                                  VdpVideoMixerAttribute const *attributes,
+                                  void *const *attribute_values)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
index e95f333b02a..8b85592b4e4 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -104,3 +104,82 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
 
    return VDP_STATUS_OK;
 }
+
+VdpStatus
+vlVdpOutputSurfaceDestroy(VdpOutputSurface surface)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpOutputSurfaceGetParameters(VdpOutputSurface surface,
+                                VdpRGBAFormat *rgba_format,
+                                uint32_t *width, uint32_t *height)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpOutputSurfaceGetBitsNative(VdpOutputSurface surface,
+                                VdpRect const *source_rect,
+                                void *const *destination_data,
+                                uint32_t const *destination_pitches)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpOutputSurfacePutBitsNative(VdpOutputSurface surface,
+                                void const *const *source_data,
+                                uint32_t const *source_pitches,
+                                VdpRect const *destination_rect)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpOutputSurfacePutBitsIndexed(VdpOutputSurface surface,
+                                 VdpIndexedFormat source_indexed_format,
+                                 void const *const *source_data,
+                                 uint32_t const *source_pitch,
+                                 VdpRect const *destination_rect,
+                                 VdpColorTableFormat color_table_format,
+                                 void const *color_table)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpOutputSurfacePutBitsYCbCr(VdpOutputSurface surface,
+                               VdpYCbCrFormat source_ycbcr_format,
+                               void const *const *source_data,
+                               uint32_t const *source_pitches,
+                               VdpRect const *destination_rect,
+                               VdpCSCMatrix const *csc_matrix)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpOutputSurfaceRenderOutputSurface(VdpOutputSurface destination_surface,
+                                      VdpRect const *destination_rect,
+                                      VdpOutputSurface source_surface,
+                                      VdpRect const *source_rect,
+                                      VdpColor const *colors,
+                                      VdpOutputSurfaceRenderBlendState const *blend_state,
+                                      uint32_t flags)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
+VdpStatus
+vlVdpOutputSurfaceRenderBitmapSurface(VdpOutputSurface destination_surface,
+                                      VdpRect const *destination_rect,
+                                      VdpBitmapSurface source_surface,
+                                      VdpRect const *source_rect,
+                                      VdpColor const *colors,
+                                      VdpOutputSurfaceRenderBlendState const *blend_state,
+                                      uint32_t flags)
+{
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
diff --git a/src/gallium/state_trackers/vdpau/query.c b/src/gallium/state_trackers/vdpau/query.c
index e971b6dc02e..97522057a10 100644
--- a/src/gallium/state_trackers/vdpau/query.c
+++ b/src/gallium/state_trackers/vdpau/query.c
@@ -206,6 +206,21 @@ vlVdpOutputSurfaceQueryGetPutBitsNativeCapabilities(VdpDevice device, VdpRGBAFor
    return VDP_STATUS_NO_IMPLEMENTATION;
 }
 
+VdpStatus
+vlVdpOutputSurfaceQueryPutBitsIndexedCapabilities(VdpDevice device,
+                                                  VdpRGBAFormat surface_rgba_format,
+                                                  VdpIndexedFormat bits_indexed_format,
+                                                  VdpColorTableFormat color_table_format,
+                                                  VdpBool *is_supported)
+{
+   debug_printf("[VDPAU] Querying output surfaces get put indexed cap\n");
+
+   if (!is_supported)
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_NO_IMPLEMENTATION;
+}
+
 VdpStatus
 vlVdpOutputSurfaceQueryPutBitsYCbCrCapabilities(VdpDevice device, VdpRGBAFormat surface_rgba_format,
                                                 VdpYCbCrFormat bits_ycbcr_format,
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index c30cd07f434..dcbc6e61916 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -56,7 +56,7 @@ vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
       goto no_htab;
    }
 
-   p_surf = CALLOC(1, sizeof(p_surf));
+   p_surf = CALLOC(1, sizeof(vlVdpSurface));
    if (!p_surf) {
       ret = VDP_STATUS_RESOURCES;
       goto no_res;
@@ -69,10 +69,13 @@ vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
    }
 
    p_surf->device = dev;
-   p_surf->video_buffer = dev->context->vpipe->create_buffer(dev->context->vpipe,
-                                                             PIPE_FORMAT_YV12, // most common used
-                                                             ChromaToPipe(chroma_type),
-                                                             width, height);
+   p_surf->video_buffer = dev->context->vpipe->create_buffer
+   (
+      dev->context->vpipe,
+      PIPE_FORMAT_YV12, // most common used
+      ChromaToPipe(chroma_type),
+      width, height
+   );
 
    *surface = vlAddDataHTAB(p_surf);
    if (*surface == 0) {
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index ac1f9ccef99..40d26511cf7 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -241,6 +241,7 @@ VdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities vlVdpVideoSurfaceQueryGetPutBits
 VdpDecoderQueryCapabilities vlVdpDecoderQueryCapabilities;
 VdpOutputSurfaceQueryCapabilities vlVdpOutputSurfaceQueryCapabilities;
 VdpOutputSurfaceQueryGetPutBitsNativeCapabilities vlVdpOutputSurfaceQueryGetPutBitsNativeCapabilities;
+VdpOutputSurfaceQueryPutBitsIndexedCapabilities vlVdpOutputSurfaceQueryPutBitsIndexedCapabilities;
 VdpOutputSurfaceQueryPutBitsYCbCrCapabilities vlVdpOutputSurfaceQueryPutBitsYCbCrCapabilities;
 VdpBitmapSurfaceQueryCapabilities vlVdpBitmapSurfaceQueryCapabilities;
 VdpVideoMixerQueryFeatureSupport vlVdpVideoMixerQueryFeatureSupport;
@@ -255,8 +256,17 @@ VdpVideoSurfaceGetBitsYCbCr vlVdpVideoSurfaceGetBitsYCbCr;
 VdpVideoSurfacePutBitsYCbCr vlVdpVideoSurfacePutBitsYCbCr;
 VdpDecoderCreate vlVdpDecoderCreate;
 VdpDecoderDestroy vlVdpDecoderDestroy;
+VdpDecoderGetParameters vlVdpDecoderGetParameters;
 VdpDecoderRender vlVdpDecoderRender;
 VdpOutputSurfaceCreate vlVdpOutputSurfaceCreate;
+VdpOutputSurfaceDestroy vlVdpOutputSurfaceDestroy;
+VdpOutputSurfaceGetParameters vlVdpOutputSurfaceGetParameters;
+VdpOutputSurfaceGetBitsNative vlVdpOutputSurfaceGetBitsNative;
+VdpOutputSurfacePutBitsNative vlVdpOutputSurfacePutBitsNative;
+VdpOutputSurfacePutBitsIndexed vlVdpOutputSurfacePutBitsIndexed;
+VdpOutputSurfacePutBitsYCbCr vlVdpOutputSurfacePutBitsYCbCr;
+VdpOutputSurfaceRenderOutputSurface vlVdpOutputSurfaceRenderOutputSurface;
+VdpOutputSurfaceRenderBitmapSurface vlVdpOutputSurfaceRenderBitmapSurface;
 VdpBitmapSurfaceCreate vlVdpBitmapSurfaceCreate;
 VdpBitmapSurfaceDestroy vlVdpBitmapSurfaceDestroy;
 VdpBitmapSurfaceGetParameters vlVdpBitmapSurfaceGetParameters;
@@ -276,6 +286,11 @@ VdpVideoMixerSetFeatureEnables vlVdpVideoMixerSetFeatureEnables;
 VdpVideoMixerCreate vlVdpVideoMixerCreate;
 VdpVideoMixerRender vlVdpVideoMixerRender;
 VdpVideoMixerSetAttributeValues vlVdpVideoMixerSetAttributeValues;
+VdpVideoMixerGetFeatureSupport vlVdpVideoMixerGetFeatureSupport;
+VdpVideoMixerGetFeatureEnables vlVdpVideoMixerGetFeatureEnables;
+VdpVideoMixerGetParameterValues vlVdpVideoMixerGetParameterValues;
+VdpVideoMixerGetAttributeValues vlVdpVideoMixerGetAttributeValues;
+VdpVideoMixerDestroy vlVdpVideoMixerDestroy;
 VdpGenerateCSCMatrix vlVdpGenerateCSCMatrix;
 
 #endif // VDPAU_PRIVATE_H
-- 
cgit v1.2.3


From 13a50bd47deff3e52470a513695c1bdb86908d73 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 26 Apr 2011 11:16:54 +0200
Subject: vdpau: implement VideoMixerDestroy

---
 src/gallium/state_trackers/vdpau/mixer.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index bafd84f4c51..b02d591eb05 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -89,7 +89,19 @@ no_handle:
 VdpStatus
 vlVdpVideoMixerDestroy(VdpVideoMixer mixer)
 {
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   vlVdpVideoMixer *vmixer;
+
+   debug_printf("[VDPAU] Destroying VideoMixer\n");
+
+   vmixer = vlGetDataHTAB(mixer);
+   if (!vmixer)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   vmixer->compositor->destroy(vmixer->compositor);
+
+   FREE(vmixer);
+
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
-- 
cgit v1.2.3


From 67d93ea940a9cac1645ce4d6bdc78c5aad812cb6 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 26 Apr 2011 11:34:23 +0200
Subject: vdpau: implement PresentationQueueDestroy

---
 src/gallium/state_trackers/vdpau/presentation.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index 2837e7a306f..1e2c78616e9 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -66,8 +66,8 @@ vlVdpPresentationQueueCreate(VdpDevice device,
       return VDP_STATUS_RESOURCES;
 
    pq->device = dev;
-   pq->compositor = context->create_compositor(context);
    pq->drawable = pqt->drawable;
+   pq->compositor = context->create_compositor(context);
    if (!pq->compositor) {
       ret = VDP_STATUS_ERROR;
       goto no_compositor;
@@ -89,7 +89,20 @@ no_compositor:
 VdpStatus
 vlVdpPresentationQueueDestroy(VdpPresentationQueue presentation_queue)
 {
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   vlVdpPresentationQueue *pq;
+
+   _debug_printf("[VDPAU] Destroying PresentationQueue\n");
+
+   pq = vlGetDataHTAB(presentation_queue);
+   if (!pq)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   pq->compositor->destroy(pq->compositor);
+
+   vlRemoveDataHTAB(presentation_queue);
+   FREE(pq);
+
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
-- 
cgit v1.2.3


From 2516a4654463322fede7cd17085dc4dbe92d324e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 26 Apr 2011 11:49:14 +0200
Subject: vdpau: implement OutputSurfaceDestroy

---
 src/gallium/state_trackers/vdpau/output.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
index 8b85592b4e4..35b75ab3aa7 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -108,7 +108,21 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
 VdpStatus
 vlVdpOutputSurfaceDestroy(VdpOutputSurface surface)
 {
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   vlVdpOutputSurface *vlsurface;
+
+   debug_printf("[VDPAU] Destroying output surface\n");
+
+   vlsurface = vlGetDataHTAB(surface);
+   if (!vlsurface)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   pipe_surface_reference(&vlsurface->surface, NULL);
+   pipe_sampler_view_reference(&vlsurface->sampler_view, NULL);
+
+   vlRemoveDataHTAB(surface);
+   FREE(vlsurface);
+
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
-- 
cgit v1.2.3


From e602ecf9ef2f66289bcb159fdbdce2c76e3c07c1 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 26 Apr 2011 19:09:10 +0200
Subject: r600g: revert commit 68cc6bc5d8b6986acc7f5780d705f4ae9be2a446

Revert commit "remove the unneeded bo from COLOR[0-9]_INFO
Also implement a working alternative.
---
 src/gallium/winsys/r600/drm/r600_hw_context.c | 34 ++++++++++++++-------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 6be05bd90f8..935ac07802e 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -199,7 +199,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028040_CB_COLOR0_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A0_CB_COLOR0_INFO, 0, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A0_CB_COLOR0_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028060_CB_COLOR0_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028080_CB_COLOR0_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -210,7 +210,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028044_CB_COLOR1_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A4_CB_COLOR1_INFO, 0, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A4_CB_COLOR1_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028064_CB_COLOR1_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028084_CB_COLOR1_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -221,7 +221,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028048_CB_COLOR2_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A8_CB_COLOR2_INFO, 0, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280A8_CB_COLOR2_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028068_CB_COLOR2_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028088_CB_COLOR2_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -232,7 +232,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02804C_CB_COLOR3_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280AC_CB_COLOR3_INFO, 0, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280AC_CB_COLOR3_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02806C_CB_COLOR3_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02808C_CB_COLOR3_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -243,7 +243,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028050_CB_COLOR4_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B0_CB_COLOR4_INFO, 0, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B0_CB_COLOR4_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028070_CB_COLOR4_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028090_CB_COLOR4_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -254,7 +254,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028054_CB_COLOR5_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B4_CB_COLOR5_INFO, 0, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B4_CB_COLOR5_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028074_CB_COLOR5_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028094_CB_COLOR5_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -263,7 +263,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280D4_CB_COLOR5_TILE, REG_FLAG_NEED_BO, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028114_CB_COLOR5_MASK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028058_CB_COLOR6_BASE, REG_FLAG_NEED_BO, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B8_CB_COLOR6_INFO, 0, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280B8_CB_COLOR6_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028078_CB_COLOR6_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_028098_CB_COLOR6_VIEW, 0, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
@@ -274,7 +274,7 @@ static const struct r600_reg r600_context_reg_list[] = {
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02805C_CB_COLOR7_BASE, REG_FLAG_NEED_BO, 0, 0},
 	{0, 0, GROUP_FORCE_NEW_BLOCK, 0, 0, 0},
-	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280BC_CB_COLOR7_INFO, 0, 0, 0xFFFFFFFF},
+	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280BC_CB_COLOR7_INFO, REG_FLAG_NEED_BO, 0, 0xFFFFFFFF},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02807C_CB_COLOR7_SIZE, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_02809C_CB_COLOR7_VIEW, 0, 0, 0},
 	{PKT3_SET_CONTEXT_REG, R600_CONTEXT_REG_OFFSET, R_0280FC_CB_COLOR7_FRAG, REG_FLAG_NEED_BO, 0, 0},
@@ -902,7 +902,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
 		}
 		if (block->flags & REG_FLAG_DIRTY_ALWAYS)
 			dirty |= R600_BLOCK_STATUS_DIRTY;
-		if (block->pm4_bo_index[id]) {
+		if (block->pm4_bo_index[id] && state->regs[i].bo) {
 			/* find relocation */
 			reloc_id = block->pm4_bo_index[id];
 			r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, state->regs[i].bo);
@@ -1124,13 +1124,15 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *
 		if (block->pm4_bo_index[j]) {
 			/* find relocation */
 			id = block->pm4_bo_index[j];
-			r600_context_bo_reloc(ctx,
-					&block->pm4[block->reloc[id].bo_pm4_index],
-					block->reloc[id].bo);
-			r600_context_bo_flush(ctx,
-					block->reloc[id].flush_flags,
-					block->reloc[id].flush_mask,
-					block->reloc[id].bo);
+			if (block->reloc[id].bo) {
+				r600_context_bo_reloc(ctx,
+						&block->pm4[block->reloc[id].bo_pm4_index],
+						block->reloc[id].bo);
+				r600_context_bo_flush(ctx,
+						block->reloc[id].flush_flags,
+						block->reloc[id].flush_mask,
+						block->reloc[id].bo);
+			}
 		}
 	}
 	memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4);
-- 
cgit v1.2.3


From 76d881b8b086495081c0a3c8fea2278f1480f107 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 27 Apr 2011 10:41:19 +0200
Subject: [g3dvl] fix a couple of bugs around paletted subpictures

---
 src/gallium/auxiliary/vl/vl_compositor.c          | 46 +++++++++++++++--------
 src/gallium/auxiliary/vl/vl_compositor.h          |  4 +-
 src/gallium/state_trackers/xorg/xvmc/subpicture.c | 22 ++++++-----
 3 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index e487abf915e..df66a2de591 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -115,31 +115,41 @@ static void *
 create_frag_shader_palette(struct vl_compositor *c)
 {
    struct ureg_program *shader;
+   struct ureg_src csc[3];
    struct ureg_src tc;
    struct ureg_src sampler;
    struct ureg_src palette;
    struct ureg_dst texel;
    struct ureg_dst fragment;
+   unsigned i;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return false;
 
+   for (i = 0; i < 3; ++i)
+      csc[i] = ureg_DECL_constant(shader, i);
+
    tc = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_LINEAR);
    sampler = ureg_DECL_sampler(shader, 0);
    palette = ureg_DECL_sampler(shader, 1);
-   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
    texel = ureg_DECL_temporary(shader);
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
    /*
     * texel = tex(tc, sampler)
-    * fragment.xyz = tex(texel, palette)
+    * fragment.xyz = tex(texel, palette) * csc
     * fragment.a = texel.a
     */
    ureg_TEX(shader, texel, TGSI_TEXTURE_2D, tc, sampler);
-   ureg_TEX(shader, fragment, TGSI_TEXTURE_1D, ureg_src(texel), palette);
+   ureg_MUL(shader, ureg_writemask(texel, TGSI_WRITEMASK_X), ureg_src(texel), ureg_imm1f(shader, 15.0f / 16.0f));
    ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(texel));
 
+   ureg_TEX(shader, texel, TGSI_TEXTURE_1D, ureg_src(texel), palette);
+
+   for (i = 0; i < 3; ++i)
+      ureg_DP4(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), csc[i], ureg_src(texel));
+
    ureg_release_temporary(shader, texel);
    ureg_END(shader);
 
@@ -242,12 +252,12 @@ init_pipe_state(struct vl_compositor *c)
    sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
    sampler.compare_func = PIPE_FUNC_ALWAYS;
    sampler.normalized_coords = 1;
-   /*sampler.lod_bias = ;*/
-   /*sampler.min_lod = ;*/
-   /*sampler.max_lod = ;*/
-   /*sampler.border_color[i] = ;*/
-   /*sampler.max_anisotropy = ;*/
-   c->sampler = c->pipe->create_sampler_state(c->pipe, &sampler);
+
+   c->sampler_linear = c->pipe->create_sampler_state(c->pipe, &sampler);
+
+   sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
+   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
+   c->sampler_nearest = c->pipe->create_sampler_state(c->pipe, &sampler);
 
    memset(&blend, 0, sizeof blend);
    blend.independent_blend_enable = 0;
@@ -286,7 +296,8 @@ static void cleanup_pipe_state(struct vl_compositor *c)
 {
    assert(c);
 
-   c->pipe->delete_sampler_state(c->pipe, c->sampler);
+   c->pipe->delete_sampler_state(c->pipe, c->sampler_linear);
+   c->pipe->delete_sampler_state(c->pipe, c->sampler_nearest);
    c->pipe->delete_blend_state(c->pipe, c->blend);
    c->pipe->delete_rasterizer_state(c->pipe, c->rast);
 }
@@ -430,6 +441,7 @@ draw_layers(struct vl_compositor *c)
          unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
 
          c->pipe->bind_fs_state(c->pipe, c->layers[i].fs);
+         c->pipe->bind_fragment_sampler_states(c->pipe, num_sampler_views, c->layers[i].samplers);
          c->pipe->set_fragment_sampler_views(c->pipe, num_sampler_views, samplers);
          util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, vb_index * 4, 4);
          vb_index++;
@@ -507,8 +519,10 @@ vl_compositor_set_buffer_layer(struct pipe_video_compositor *compositor,
    c->layers[layer].fs = c->fs_video_buffer;
 
    sampler_views = buffer->get_sampler_view_components(buffer);
-   for (i = 0; i < 3; ++i)
+   for (i = 0; i < 3; ++i) {
+      c->layers[layer].samplers[i] = c->sampler_linear;
       pipe_sampler_view_reference(&c->layers[layer].sampler_views[i], sampler_views[i]);
+   }
 
    c->layers[layer].src_rect = src_rect ? *src_rect : default_rect(&c->layers[layer]);
    c->layers[layer].dst_rect = dst_rect ? *dst_rect : default_rect(&c->layers[layer]);
@@ -529,6 +543,9 @@ vl_compositor_set_palette_layer(struct pipe_video_compositor *compositor,
 
    c->used_layers |= 1 << layer;
    c->layers[layer].fs = c->fs_palette;
+   c->layers[layer].samplers[0] = c->sampler_linear;
+   c->layers[layer].samplers[1] = c->sampler_nearest;
+   c->layers[layer].samplers[2] = NULL;
    pipe_sampler_view_reference(&c->layers[layer].sampler_views[0], indexes);
    pipe_sampler_view_reference(&c->layers[layer].sampler_views[1], palette);
    pipe_sampler_view_reference(&c->layers[layer].sampler_views[2], NULL);
@@ -550,6 +567,9 @@ vl_compositor_set_rgba_layer(struct pipe_video_compositor *compositor,
 
    c->used_layers |= 1 << layer;
    c->layers[layer].fs = c->fs_rgba;
+   c->layers[layer].samplers[0] = c->sampler_linear;
+   c->layers[layer].samplers[1] = NULL;
+   c->layers[layer].samplers[2] = NULL;
    pipe_sampler_view_reference(&c->layers[layer].sampler_views[0], rgba);
    pipe_sampler_view_reference(&c->layers[layer].sampler_views[1], NULL);
    pipe_sampler_view_reference(&c->layers[layer].sampler_views[2], NULL);
@@ -566,7 +586,6 @@ vl_compositor_render(struct pipe_video_compositor *compositor,
 {
    struct vl_compositor *c = (struct vl_compositor *)compositor;
    struct pipe_scissor_state scissor;
-   void *samplers[3];
 
    assert(compositor);
    assert(dst_surface);
@@ -590,14 +609,11 @@ vl_compositor_render(struct pipe_video_compositor *compositor,
       scissor.maxy = dst_surface->height;
    }
 
-   samplers[0] = samplers[1] = samplers[2] = c->sampler;
-
    gen_vertex_data(c);
 
    c->pipe->set_scissor_state(c->pipe, &scissor);
    c->pipe->set_framebuffer_state(c->pipe, &c->fb_state);
    c->pipe->set_viewport_state(c->pipe, &c->viewport);
-   c->pipe->bind_fragment_sampler_states(c->pipe, 3, &samplers[0]);
    c->pipe->bind_vs_state(c->pipe, c->vs);
    c->pipe->set_vertex_buffers(c->pipe, 1, &c->vertex_buf);
    c->pipe->bind_vertex_elements_state(c->pipe, c->vertex_elems_state);
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index c7da533c988..0bce04fcbf8 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -39,6 +39,7 @@ struct pipe_context;
 struct vl_compositor_layer
 {
    void *fs;
+   void *samplers[3];
    struct pipe_sampler_view *sampler_views[3];
    struct pipe_video_rect src_rect;
    struct pipe_video_rect dst_rect;
@@ -54,7 +55,8 @@ struct vl_compositor
    struct pipe_vertex_buffer vertex_buf;
    struct pipe_resource *csc_matrix;
 
-   void *sampler;
+   void *sampler_linear;
+   void *sampler_nearest;
    void *blend;
    void *rast;
    void *vertex_elems_state;
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 68519c08885..821c87e0dab 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -92,14 +92,14 @@ static void XvIDToSwizzle(int xvimage_id, struct pipe_sampler_view *tmpl)
          tmpl->swizzle_a = PIPE_SWIZZLE_ONE;
          break;
 
-      case FOURCC_AI44:
+      case FOURCC_IA44:
          tmpl->swizzle_r = PIPE_SWIZZLE_ALPHA;
          tmpl->swizzle_g = PIPE_SWIZZLE_ZERO;
          tmpl->swizzle_b = PIPE_SWIZZLE_ZERO;
          tmpl->swizzle_a = PIPE_SWIZZLE_RED;
          break;
 
-      case FOURCC_IA44:
+      case FOURCC_AI44:
          tmpl->swizzle_r = PIPE_SWIZZLE_RED;
          tmpl->swizzle_g = PIPE_SWIZZLE_ZERO;
          tmpl->swizzle_b = PIPE_SWIZZLE_ZERO;
@@ -117,10 +117,10 @@ static int PipeToComponentOrder(enum pipe_format format, char *component_order)
          return 0;
 
       case PIPE_FORMAT_L4A4_UNORM:
-         component_order[0] = PIPE_SWIZZLE_RED;
-         component_order[1] = PIPE_SWIZZLE_GREEN;
-         component_order[2] = PIPE_SWIZZLE_BLUE;
-         component_order[3] = PIPE_SWIZZLE_ALPHA;
+         component_order[0] = 'Y';
+         component_order[1] = 'U';
+         component_order[2] = 'V';
+         component_order[3] = 'A';
          return 4;
 
       default:
@@ -271,7 +271,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
 
    if (subpicture->num_palette_entries > 0) {
       tex_templ.target = PIPE_TEXTURE_1D;
-      tex_templ.format = PIPE_FORMAT_B8G8R8A8_UNORM;
+      tex_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
       tex_templ.width0 = subpicture->num_palette_entries;
       tex_templ.height0 = 1;
       tex_templ.usage = PIPE_USAGE_STATIC;
@@ -280,6 +280,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
 
       memset(&sampler_templ, 0, sizeof(sampler_templ));
       u_sampler_view_default_template(&sampler_templ, tex, tex->format);
+      sampler_templ.swizzle_a = PIPE_SWIZZLE_ONE;
       subpicture_priv->palette = vpipe->create_sampler_view(vpipe, tex, &sampler_templ);
       pipe_resource_reference(&tex, NULL);
       if (!subpicture_priv->sampler) {
@@ -333,6 +334,7 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
    XvMCContextPrivate *context_priv;
    struct pipe_video_context *vpipe;
    struct pipe_box dst_box = {dstx, dsty, 0, width, height, 1};
+   unsigned src_stride;
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Compositing subpicture %p.\n", subpicture);
 
@@ -354,9 +356,11 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
    context_priv = subpicture_priv->context->privData;
    vpipe = context_priv->vctx->vpipe;
 
-   /* TODO: Assert rects are within bounds? Or clip? */
+   /* clipping should be done by upload_sampler and regardles what the documentation
+   says image->pitches[0] doesn't seems to be in bytes, so don't use it */
+   src_stride = image->width * util_format_get_blocksize(subpicture_priv->sampler->texture->format);
    vpipe->upload_sampler(vpipe, subpicture_priv->sampler, &dst_box,
-                         image->data, image->pitches[0], srcx, srcy);
+                         image->data, src_stride, srcx, srcy);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p composited.\n", subpicture);
 
-- 
cgit v1.2.3


From f3f212acf0d2fc25d3b6bd70dd1f346d97a9b25d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 27 Apr 2011 12:05:46 +0200
Subject: xvmc: recreate drawable surface only when dst area changes

---
 src/gallium/state_trackers/xorg/xvmc/context.c     |  1 +
 src/gallium/state_trackers/xorg/xvmc/surface.c     | 23 +++++++++++++---------
 .../state_trackers/xorg/xvmc/xvmc_private.h        |  4 ++++
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index f77dc0906bb..a28c3f7a424 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -318,6 +318,7 @@ Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
    context_priv = context->privData;
    vctx = context_priv->vctx;
    vscreen = vctx->vscreen;
+   pipe_surface_reference(&context_priv->drawable_surface, NULL);
    context_priv->decoder->destroy(context_priv->decoder);
    context_priv->compositor->destroy(context_priv->compositor);
    vl_video_destroy(vctx);
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index a80515839ee..7819fe0777b 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -488,7 +488,6 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    XvMCContext *context;
    struct pipe_video_rect src_rect = {srcx, srcy, srcw, srch};
    struct pipe_video_rect dst_rect = {destx, desty, destw, desth};
-   struct pipe_surface *drawable_surface;
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Displaying surface %p.\n", surface);
 
@@ -501,8 +500,15 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    context = surface_priv->context;
    context_priv = context->privData;
 
-   drawable_surface = vl_drawable_surface_get(context_priv->vctx, drawable);
-   if (!drawable_surface)
+   if (!context_priv->drawable_surface ||
+       context_priv->dst_rect.x != dst_rect.x || context_priv->dst_rect.y != dst_rect.y ||
+       context_priv->dst_rect.w != dst_rect.w || context_priv->dst_rect.h != dst_rect.h) {
+
+      context_priv->drawable_surface = vl_drawable_surface_get(context_priv->vctx, drawable);
+      context_priv->dst_rect = dst_rect;
+   }
+
+   if (!context_priv->drawable_surface)
       return BadDrawable;
 
    assert(flags == XVMC_TOP_FIELD || flags == XVMC_BOTTOM_FIELD || flags == XVMC_FRAME_PICTURE);
@@ -538,7 +544,8 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
          compositor->set_palette_layer(compositor, 1, subpicture_priv->sampler, subpicture_priv->palette,
                                        &subpicture_priv->src_rect, &subpicture_priv->dst_rect);
       else
-         compositor->set_rgba_layer(compositor, 1, subpicture_priv->sampler, &src_rect, &dst_rect);
+         compositor->set_rgba_layer(compositor, 1, subpicture_priv->sampler,
+                                    &subpicture_priv->src_rect, &subpicture_priv->dst_rect);
 
       surface_priv->subpicture = NULL;
       subpicture_priv->surface = NULL;
@@ -547,20 +554,18 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    // Workaround for r600g, there seems to be a bug in the fence refcounting code
    vpipe->screen->fence_reference(vpipe->screen, &surface_priv->fence, NULL);
 
-   compositor->render_picture(compositor, PictureToPipe(flags), drawable_surface, &dst_rect, &surface_priv->fence);
+   compositor->render_picture(compositor, PictureToPipe(flags), context_priv->drawable_surface, &dst_rect, &surface_priv->fence);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);
 
    vpipe->screen->flush_frontbuffer
    (
       vpipe->screen,
-      drawable_surface->texture,
+      context_priv->drawable_surface->texture,
       0, 0,
-      vl_contextprivate_get(context_priv->vctx, drawable_surface)
+      vl_contextprivate_get(context_priv->vctx, context_priv->drawable_surface)
    );
 
-   pipe_surface_reference(&drawable_surface, NULL);
-
    if(dump_window == -1) {
       dump_window = debug_get_num_option("XVMC_DUMP", 0);
    }
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 262adac7ab6..8d26b196fdc 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -62,6 +62,10 @@ typedef struct
 
    unsigned short subpicture_max_width;
    unsigned short subpicture_max_height;
+
+   struct pipe_video_rect dst_rect;
+   struct pipe_surface *drawable_surface;
+
 } XvMCContextPrivate;
 
 typedef struct
-- 
cgit v1.2.3


From 559f6d6cf4a9469c2c6ccea482115f22080f185f Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 27 Apr 2011 12:59:30 +0200
Subject: [g3dvl] fix setting width, height and chroma format in video buffer

---
 src/gallium/auxiliary/vl/vl_video_buffer.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
index dccd7e93945..976d22830de 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -180,6 +180,9 @@ vl_video_buffer_init(struct pipe_video_context *context,
    buffer->base.get_sampler_view_planes = vl_video_buffer_sampler_view_planes;
    buffer->base.get_sampler_view_components = vl_video_buffer_sampler_view_components;
    buffer->base.get_surfaces = vl_video_buffer_surfaces;
+   buffer->base.chroma_format = chroma_format;
+   buffer->base.width = width;
+   buffer->base.height = height;
    buffer->pipe = pipe;
    buffer->num_planes = 1;
 
-- 
cgit v1.2.3


From 6092fbed46302e2bdf6c6f2e229f4e393652e228 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 27 Apr 2011 13:01:01 +0200
Subject: [g3dvl] move compositor src and dst normalisation into layer setting

---
 src/gallium/auxiliary/vl/vl_compositor.c | 98 +++++++++++++++++++-------------
 src/gallium/auxiliary/vl/vl_compositor.h |  7 ++-
 2 files changed, 64 insertions(+), 41 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index df66a2de591..2f2b32d7944 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -366,36 +366,56 @@ default_rect(struct vl_compositor_layer *layer)
    return rect;
 }
 
+static inline struct vertex2f
+calc_topleft(struct vertex2f inv_size, struct pipe_video_rect rect)
+{
+   struct vertex2f res = { rect.x * inv_size.x, rect.y * inv_size.y };
+   return res;
+}
+
+static inline struct vertex2f
+calc_bottomright(struct vertex2f inv_size, struct pipe_video_rect rect)
+{
+   struct vertex2f res = { (rect.x + rect.w) * inv_size.x, (rect.y + rect.h) * inv_size.y };
+   return res;
+}
+
+static inline void
+calc_src_and_dst(struct vl_compositor_layer *layer, unsigned width, unsigned height,
+                 struct pipe_video_rect src, struct pipe_video_rect dst)
+{
+   struct vertex2f inv_size =  { 1.0f / width, 1.0f / height };
+
+   layer->src.tl = calc_topleft(inv_size, src);
+   layer->src.br = calc_bottomright(inv_size, src);
+   layer->dst.tl = calc_topleft(inv_size, dst);
+   layer->dst.br = calc_bottomright(inv_size, dst);
+}
+
 static void
-gen_rect_verts(struct vertex4f *vb,
-               struct pipe_video_rect *src_rect,
-               struct vertex2f *src_inv_size,
-               struct pipe_video_rect *dst_rect,
-               struct vertex2f *dst_inv_size)
+gen_rect_verts(struct vertex4f *vb, struct vl_compositor_layer *layer)
 {
-   assert(vb);
-   assert(src_rect && src_inv_size);
-   assert(dst_rect && dst_inv_size);
-
-   vb[0].x = dst_rect->x * dst_inv_size->x;
-   vb[0].y = dst_rect->y * dst_inv_size->y;
-   vb[0].z = src_rect->x * src_inv_size->x;
-   vb[0].w = src_rect->y * src_inv_size->y;
-
-   vb[1].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
-   vb[1].y = dst_rect->y * dst_inv_size->y;
-   vb[1].z = (src_rect->x + src_rect->w) * src_inv_size->x;
-   vb[1].w = src_rect->y * src_inv_size->y;
-
-   vb[2].x = (dst_rect->x + dst_rect->w) * dst_inv_size->x;
-   vb[2].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
-   vb[2].z = (src_rect->x + src_rect->w) * src_inv_size->x;
-   vb[2].w = (src_rect->y + src_rect->h) * src_inv_size->y;
-
-   vb[3].x = dst_rect->x * dst_inv_size->x;
-   vb[3].y = (dst_rect->y + dst_rect->h) * dst_inv_size->y;
-   vb[3].z = src_rect->x * src_inv_size->x;
-   vb[3].w = (src_rect->y + src_rect->h) * src_inv_size->y;
+   assert(vb && layer);
+
+   vb[0].x = layer->dst.tl.x;
+   vb[0].y = layer->dst.tl.y;
+   vb[0].z = layer->src.tl.x;
+   vb[0].w = layer->src.tl.y;
+
+   vb[1].x = layer->dst.br.x;
+   vb[1].y = layer->dst.tl.y;
+   vb[1].z = layer->src.br.x;
+   vb[1].w = layer->src.tl.y;
+
+   vb[2].x = layer->dst.br.x;
+   vb[2].y = layer->dst.br.y;
+   vb[2].z = layer->src.br.x;
+   vb[2].w = layer->src.br.y;
+
+   vb[3].x = layer->dst.tl.x;
+   vb[3].y = layer->dst.br.y;
+   vb[3].z = layer->src.tl.x;
+   vb[3].w = layer->src.br.y;
 }
 
 static void
@@ -416,11 +436,7 @@ gen_vertex_data(struct vl_compositor *c)
 
    for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; i++) {
       if (c->used_layers & (1 << i)) {
-         struct pipe_sampler_view *sv = c->layers[i].sampler_views[0];
-         struct vertex2f src_inv_size = {1.0f / sv->texture->width0, 1.0f / sv->texture->height0};
-
-         gen_rect_verts(vb, &c->layers[i].src_rect, &src_inv_size, &c->layers[i].dst_rect, &src_inv_size);
-
+         gen_rect_verts(vb, &c->layers[i]);
          vb += 4;
       }
    }
@@ -524,8 +540,9 @@ vl_compositor_set_buffer_layer(struct pipe_video_compositor *compositor,
       pipe_sampler_view_reference(&c->layers[layer].sampler_views[i], sampler_views[i]);
    }
 
-   c->layers[layer].src_rect = src_rect ? *src_rect : default_rect(&c->layers[layer]);
-   c->layers[layer].dst_rect = dst_rect ? *dst_rect : default_rect(&c->layers[layer]);
+   calc_src_and_dst(&c->layers[layer], buffer->width, buffer->height,
+                    src_rect ? *src_rect : default_rect(&c->layers[layer]),
+                    dst_rect ? *dst_rect : default_rect(&c->layers[layer]));
 }
 
 static void
@@ -549,8 +566,10 @@ vl_compositor_set_palette_layer(struct pipe_video_compositor *compositor,
    pipe_sampler_view_reference(&c->layers[layer].sampler_views[0], indexes);
    pipe_sampler_view_reference(&c->layers[layer].sampler_views[1], palette);
    pipe_sampler_view_reference(&c->layers[layer].sampler_views[2], NULL);
-   c->layers[layer].src_rect = src_rect ? *src_rect : default_rect(&c->layers[layer]);
-   c->layers[layer].dst_rect = dst_rect ? *dst_rect : default_rect(&c->layers[layer]);
+   calc_src_and_dst(&c->layers[layer], indexes->texture->width0, indexes->texture->height0,
+                    src_rect ? *src_rect : default_rect(&c->layers[layer]),
+                    dst_rect ? *dst_rect : default_rect(&c->layers[layer]));
+
 }
 
 static void
@@ -573,8 +592,9 @@ vl_compositor_set_rgba_layer(struct pipe_video_compositor *compositor,
    pipe_sampler_view_reference(&c->layers[layer].sampler_views[0], rgba);
    pipe_sampler_view_reference(&c->layers[layer].sampler_views[1], NULL);
    pipe_sampler_view_reference(&c->layers[layer].sampler_views[2], NULL);
-   c->layers[layer].src_rect = src_rect ? *src_rect : default_rect(&c->layers[layer]);
-   c->layers[layer].dst_rect = dst_rect ? *dst_rect : default_rect(&c->layers[layer]);
+   calc_src_and_dst(&c->layers[layer], rgba->texture->width0, rgba->texture->height0,
+                    src_rect ? *src_rect : default_rect(&c->layers[layer]),
+                    dst_rect ? *dst_rect : default_rect(&c->layers[layer]));
 }
 
 static void
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 0bce04fcbf8..339ea415e8a 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -32,6 +32,8 @@
 #include <pipe/p_video_context.h>
 #include <pipe/p_video_state.h>
 
+#include "vl_types.h"
+
 struct pipe_context;
 
 #define VL_COMPOSITOR_MAX_LAYERS 16
@@ -41,8 +43,9 @@ struct vl_compositor_layer
    void *fs;
    void *samplers[3];
    struct pipe_sampler_view *sampler_views[3];
-   struct pipe_video_rect src_rect;
-   struct pipe_video_rect dst_rect;
+   struct {
+      struct vertex2f tl, br;
+   } src, dst;
 };
 
 struct vl_compositor
-- 
cgit v1.2.3


From 0d53cb2e83cafb7007068192674a8b5b57a27ca4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 27 Apr 2011 15:17:24 +0200
Subject: [g3dvl] implement clearing of dirty destination surface areas

---
 src/gallium/auxiliary/vl/vl_compositor.c | 83 +++++++++++++++++++++++++++-----
 src/gallium/auxiliary/vl/vl_compositor.h |  5 ++
 2 files changed, 75 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 2f2b32d7944..f6f7b65fd6b 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -31,6 +31,7 @@
 
 #include <util/u_memory.h>
 #include <util/u_draw.h>
+#include <util/u_surface.h>
 
 #include <tgsi/tgsi_ureg.h>
 
@@ -302,6 +303,22 @@ static void cleanup_pipe_state(struct vl_compositor *c)
    c->pipe->delete_rasterizer_state(c->pipe, c->rast);
 }
 
+static bool
+create_vertex_buffer(struct vl_compositor *c)
+{
+   assert(c);
+
+   pipe_resource_reference(&c->vertex_buf.buffer, NULL);
+   c->vertex_buf.buffer = pipe_buffer_create
+   (
+      c->pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      PIPE_USAGE_STREAM,
+      sizeof(struct vertex4f) * VL_COMPOSITOR_MAX_LAYERS * 4
+   );
+   return c->vertex_buf.buffer != NULL;
+}
+
 static bool
 init_buffers(struct vl_compositor *c)
 {
@@ -314,13 +331,7 @@ init_buffers(struct vl_compositor *c)
     */
    c->vertex_buf.stride = sizeof(struct vertex4f);
    c->vertex_buf.buffer_offset = 0;
-   c->vertex_buf.buffer = pipe_buffer_create
-   (
-      c->pipe->screen,
-      PIPE_BIND_VERTEX_BUFFER,
-      PIPE_USAGE_STREAM,
-      sizeof(struct vertex4f) * (VL_COMPOSITOR_MAX_LAYERS + 1) * 4
-   );
+   create_vertex_buffer(c);
 
    vertex_elems[0].src_offset = 0;
    vertex_elems[0].instance_divisor = 0;
@@ -431,13 +442,30 @@ gen_vertex_data(struct vl_compositor *c)
                         PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD | PIPE_TRANSFER_DONTBLOCK,
                         &buf_transfer);
 
-   if (!vb)
-      return;
+   if (!vb) {
+      // If buffer is still locked from last draw create a new one
+      create_vertex_buffer(c);
+      vb = pipe_buffer_map(c->pipe, c->vertex_buf.buffer,
+                           PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+                           &buf_transfer);
+   }
 
    for (i = 0; i < VL_COMPOSITOR_MAX_LAYERS; i++) {
       if (c->used_layers & (1 << i)) {
-         gen_rect_verts(vb, &c->layers[i]);
+         struct vl_compositor_layer *layer = &c->layers[i];
+         gen_rect_verts(vb, layer);
          vb += 4;
+
+         if (layer->clearing &&
+             c->dirty_tl.x >= layer->dst.tl.x &&
+             c->dirty_tl.y >= layer->dst.tl.y &&
+             c->dirty_br.x <= layer->dst.br.x &&
+             c->dirty_br.y <= layer->dst.br.y) {
+
+            // We clear the dirty area anyway, no need for clear_render_target
+            c->dirty_tl.x = c->dirty_tl.y = 1.0f;
+            c->dirty_br.x = c->dirty_br.y = 0.0f;
+         }
       }
    }
 
@@ -453,18 +481,36 @@ draw_layers(struct vl_compositor *c)
 
    for (i = 0, vb_index = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
       if (c->used_layers & (1 << i)) {
-         struct pipe_sampler_view **samplers = &c->layers[i].sampler_views[0];
+         struct vl_compositor_layer *layer = &c->layers[i];
+         struct pipe_sampler_view **samplers = &layer->sampler_views[0];
          unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
 
-         c->pipe->bind_fs_state(c->pipe, c->layers[i].fs);
-         c->pipe->bind_fragment_sampler_states(c->pipe, num_sampler_views, c->layers[i].samplers);
+         c->pipe->bind_fs_state(c->pipe, layer->fs);
+         c->pipe->bind_fragment_sampler_states(c->pipe, num_sampler_views, layer->samplers);
          c->pipe->set_fragment_sampler_views(c->pipe, num_sampler_views, samplers);
          util_draw_arrays(c->pipe, PIPE_PRIM_QUADS, vb_index * 4, 4);
          vb_index++;
+
+         // Remember the currently drawn area as dirty for the next draw command
+         c->dirty_tl.x = MIN2(layer->dst.tl.x, c->dirty_tl.x);
+         c->dirty_tl.y = MIN2(layer->dst.tl.y, c->dirty_tl.y);
+         c->dirty_br.x = MAX2(layer->dst.br.x, c->dirty_br.x);
+         c->dirty_br.y = MAX2(layer->dst.br.y, c->dirty_br.y);
       }
    }
 }
 
+static void
+vl_compositor_reset_dirty_area(struct pipe_video_compositor *compositor)
+{
+   struct vl_compositor *c = (struct vl_compositor *)compositor;
+
+   assert(compositor);
+
+   c->dirty_tl.x = c->dirty_tl.y = 0.0f;
+   c->dirty_br.x = c->dirty_br.y = 1.0f;
+}
+
 static void
 vl_compositor_clear_layers(struct pipe_video_compositor *compositor)
 {
@@ -532,6 +578,7 @@ vl_compositor_set_buffer_layer(struct pipe_video_compositor *compositor,
    assert(layer < VL_COMPOSITOR_MAX_LAYERS);
 
    c->used_layers |= 1 << layer;
+   c->layers[layer].clearing = true;
    c->layers[layer].fs = c->fs_video_buffer;
 
    sampler_views = buffer->get_sampler_view_components(buffer);
@@ -559,6 +606,7 @@ vl_compositor_set_palette_layer(struct pipe_video_compositor *compositor,
    assert(layer < VL_COMPOSITOR_MAX_LAYERS);
 
    c->used_layers |= 1 << layer;
+   c->layers[layer].clearing = false;
    c->layers[layer].fs = c->fs_palette;
    c->layers[layer].samplers[0] = c->sampler_linear;
    c->layers[layer].samplers[1] = c->sampler_nearest;
@@ -585,6 +633,7 @@ vl_compositor_set_rgba_layer(struct pipe_video_compositor *compositor,
    assert(layer < VL_COMPOSITOR_MAX_LAYERS);
 
    c->used_layers |= 1 << layer;
+   c->layers[layer].clearing = false;
    c->layers[layer].fs = c->fs_rgba;
    c->layers[layer].samplers[0] = c->sampler_linear;
    c->layers[layer].samplers[1] = NULL;
@@ -606,6 +655,7 @@ vl_compositor_render(struct pipe_video_compositor *compositor,
 {
    struct vl_compositor *c = (struct vl_compositor *)compositor;
    struct pipe_scissor_state scissor;
+   float clearcolor[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
 
    assert(compositor);
    assert(dst_surface);
@@ -631,6 +681,12 @@ vl_compositor_render(struct pipe_video_compositor *compositor,
 
    gen_vertex_data(c);
 
+   if (c->dirty_tl.x < c->dirty_br.x || c->dirty_tl.y < c->dirty_br.y) {
+      util_clear_render_target(c->pipe, dst_surface, clearcolor, 0, 0, dst_surface->width, dst_surface->height);
+      c->dirty_tl.x = c->dirty_tl.y = 1.0f;
+      c->dirty_br.x = c->dirty_br.y = 0.0f;
+   }
+
    c->pipe->set_scissor_state(c->pipe, &scissor);
    c->pipe->set_framebuffer_state(c->pipe, &c->fb_state);
    c->pipe->set_viewport_state(c->pipe, &c->viewport);
@@ -682,6 +738,7 @@ vl_compositor_init(struct pipe_video_context *vpipe, struct pipe_context *pipe)
 
    vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, csc_matrix);
    vl_compositor_set_csc_matrix(&compositor->base, csc_matrix);
+   vl_compositor_reset_dirty_area(&compositor->base);
 
    return &compositor->base;
 }
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 339ea415e8a..725dcc15a13 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -40,8 +40,11 @@ struct pipe_context;
 
 struct vl_compositor_layer
 {
+   bool clearing;
+
    void *fs;
    void *samplers[3];
+
    struct pipe_sampler_view *sampler_views[3];
    struct {
       struct vertex2f tl, br;
@@ -69,6 +72,8 @@ struct vl_compositor
    void *fs_palette;
    void *fs_rgba;
 
+   struct vertex2f dirty_tl, dirty_br;
+
    unsigned used_layers:VL_COMPOSITOR_MAX_LAYERS;
    struct vl_compositor_layer layers[VL_COMPOSITOR_MAX_LAYERS];
 };
-- 
cgit v1.2.3


From 221e1b7ababe67efe80f38f8ab2236be5cacfddf Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 27 Apr 2011 15:28:13 +0200
Subject: [g3dvl] make reset_dirty_area a public interface

---
 src/gallium/auxiliary/vl/vl_compositor.c       |  1 +
 src/gallium/include/pipe/p_video_context.h     |  5 +++++
 src/gallium/state_trackers/xorg/xvmc/surface.c | 16 +++++++++-------
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index f6f7b65fd6b..c13e69d53d1 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -713,6 +713,7 @@ vl_compositor_init(struct pipe_video_context *vpipe, struct pipe_context *pipe)
    compositor->base.context = vpipe;
    compositor->base.destroy = vl_compositor_destroy;
    compositor->base.set_csc_matrix = vl_compositor_set_csc_matrix;
+   compositor->base.reset_dirty_area = vl_compositor_reset_dirty_area;
    compositor->base.clear_layers = vl_compositor_clear_layers;
    compositor->base.set_buffer_layer = vl_compositor_set_buffer_layer;
    compositor->base.set_palette_layer = vl_compositor_set_palette_layer;
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 41f2e320580..933b377a0c3 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -264,6 +264,11 @@ struct pipe_video_compositor
     */
    void (*set_csc_matrix)(struct pipe_video_compositor *compositor, const float mat[16]);
 
+   /**
+    * reset dirty area, so it's cleared with the clear colour
+    */
+   void (*reset_dirty_area)(struct pipe_video_compositor *compositor);
+
    /**
     * set overlay samplers
     */
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 7819fe0777b..da41a182164 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -500,20 +500,26 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    context = surface_priv->context;
    context_priv = context->privData;
 
+   assert(flags == XVMC_TOP_FIELD || flags == XVMC_BOTTOM_FIELD || flags == XVMC_FRAME_PICTURE);
+   assert(srcx + srcw - 1 < surface->width);
+   assert(srcy + srch - 1 < surface->height);
+
+   subpicture_priv = surface_priv->subpicture ? surface_priv->subpicture->privData : NULL;
+   vpipe = context_priv->vctx->vpipe;
+   compositor = context_priv->compositor;
+
    if (!context_priv->drawable_surface ||
        context_priv->dst_rect.x != dst_rect.x || context_priv->dst_rect.y != dst_rect.y ||
        context_priv->dst_rect.w != dst_rect.w || context_priv->dst_rect.h != dst_rect.h) {
 
       context_priv->drawable_surface = vl_drawable_surface_get(context_priv->vctx, drawable);
       context_priv->dst_rect = dst_rect;
+      compositor->reset_dirty_area(compositor);
    }
 
    if (!context_priv->drawable_surface)
       return BadDrawable;
 
-   assert(flags == XVMC_TOP_FIELD || flags == XVMC_BOTTOM_FIELD || flags == XVMC_FRAME_PICTURE);
-   assert(srcx + srcw - 1 < surface->width);
-   assert(srcy + srch - 1 < surface->height);
    /*
     * Some apps (mplayer) hit these asserts because they call
     * this function after the window has been resized by the WM
@@ -526,10 +532,6 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    assert(desty + desth - 1 < drawable_surface->height);
     */
 
-   subpicture_priv = surface_priv->subpicture ? surface_priv->subpicture->privData : NULL;
-   vpipe = context_priv->vctx->vpipe;
-   compositor = context_priv->compositor;
-
    unmap_and_flush_surface(surface_priv);
 
    compositor->clear_layers(compositor);
-- 
cgit v1.2.3


From 563f6c225c75b2344e0f4cd5011540a21fac9abf Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 27 Apr 2011 16:50:34 +0200
Subject: [g3dvl] make clear color configureable

---
 src/gallium/auxiliary/vl/vl_compositor.c   | 19 +++++++++++++++++--
 src/gallium/auxiliary/vl/vl_compositor.h   |  1 +
 src/gallium/include/pipe/p_video_context.h |  5 +++++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index c13e69d53d1..cc257e5614f 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -511,6 +511,18 @@ vl_compositor_reset_dirty_area(struct pipe_video_compositor *compositor)
    c->dirty_br.x = c->dirty_br.y = 1.0f;
 }
 
+static void
+vl_compositor_set_clear_color(struct pipe_video_compositor *compositor, float color[4])
+{
+   struct vl_compositor *c = (struct vl_compositor *)compositor;
+   unsigned i;
+
+   assert(compositor);
+
+   for (i = 0; i < 4; ++i)
+      c->clear_color[i] = color[i];
+}
+
 static void
 vl_compositor_clear_layers(struct pipe_video_compositor *compositor)
 {
@@ -655,7 +667,6 @@ vl_compositor_render(struct pipe_video_compositor *compositor,
 {
    struct vl_compositor *c = (struct vl_compositor *)compositor;
    struct pipe_scissor_state scissor;
-   float clearcolor[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
 
    assert(compositor);
    assert(dst_surface);
@@ -682,7 +693,7 @@ vl_compositor_render(struct pipe_video_compositor *compositor,
    gen_vertex_data(c);
 
    if (c->dirty_tl.x < c->dirty_br.x || c->dirty_tl.y < c->dirty_br.y) {
-      util_clear_render_target(c->pipe, dst_surface, clearcolor, 0, 0, dst_surface->width, dst_surface->height);
+      util_clear_render_target(c->pipe, dst_surface, c->clear_color, 0, 0, dst_surface->width, dst_surface->height);
       c->dirty_tl.x = c->dirty_tl.y = 1.0f;
       c->dirty_br.x = c->dirty_br.y = 0.0f;
    }
@@ -714,6 +725,7 @@ vl_compositor_init(struct pipe_video_context *vpipe, struct pipe_context *pipe)
    compositor->base.destroy = vl_compositor_destroy;
    compositor->base.set_csc_matrix = vl_compositor_set_csc_matrix;
    compositor->base.reset_dirty_area = vl_compositor_reset_dirty_area;
+   compositor->base.set_clear_color = vl_compositor_set_clear_color;
    compositor->base.clear_layers = vl_compositor_clear_layers;
    compositor->base.set_buffer_layer = vl_compositor_set_buffer_layer;
    compositor->base.set_palette_layer = vl_compositor_set_palette_layer;
@@ -739,6 +751,9 @@ vl_compositor_init(struct pipe_video_context *vpipe, struct pipe_context *pipe)
 
    vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, csc_matrix);
    vl_compositor_set_csc_matrix(&compositor->base, csc_matrix);
+
+   compositor->clear_color[0] = compositor->clear_color[1] = 0.0f;
+   compositor->clear_color[2] = compositor->clear_color[3] = 0.0f;
    vl_compositor_reset_dirty_area(&compositor->base);
 
    return &compositor->base;
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 725dcc15a13..33d2a20733d 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -72,6 +72,7 @@ struct vl_compositor
    void *fs_palette;
    void *fs_rgba;
 
+   float clear_color[4];
    struct vertex2f dirty_tl, dirty_br;
 
    unsigned used_layers:VL_COMPOSITOR_MAX_LAYERS;
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 933b377a0c3..80bbb6e1376 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -269,6 +269,11 @@ struct pipe_video_compositor
     */
    void (*reset_dirty_area)(struct pipe_video_compositor *compositor);
 
+   /**
+    * set the clear color
+    */
+   void (*set_clear_color)(struct pipe_video_compositor *compositor, float color[4]);
+
    /**
     * set overlay samplers
     */
-- 
cgit v1.2.3


From 2471acfc4c051d480861265011fa5e77fad02887 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 27 Apr 2011 20:38:31 +0200
Subject: vdpau: start to implement MPEG2 decoder part

---
 src/gallium/state_trackers/vdpau/decode.c        | 256 ++++++++---------------
 src/gallium/state_trackers/vdpau/mixer.c         |  12 ++
 src/gallium/state_trackers/vdpau/vdpau_private.h |  11 +-
 3 files changed, 98 insertions(+), 181 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 12cc6c7b42f..d8111a8243f 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -25,13 +25,14 @@
  *
  **************************************************************************/
 
-#include "vdpau_private.h"
-#include "mpeg2_bitstream_parser.h"
+#include <pipe/p_video_context.h>
+
 #include <util/u_memory.h>
 #include <util/u_math.h>
-#include <pipe/p_video_context.h>
 #include <util/u_debug.h>
 
+#include "vdpau_private.h"
+
 VdpStatus
 vlVdpDecoderCreate(VdpDevice device,
                    VdpDecoderProfile profile,
@@ -39,12 +40,11 @@ vlVdpDecoderCreate(VdpDevice device,
                    uint32_t max_references,
                    VdpDecoder *decoder)
 {
-   return VDP_STATUS_NO_IMPLEMENTATION;
-
-#if 0
-   enum pipe_video_profile p_profile = PIPE_VIDEO_PROFILE_UNKNOWN;
-   VdpStatus ret = VDP_STATUS_OK;
-   vlVdpDecoder *vldecoder = NULL;
+   enum pipe_video_profile p_profile;
+   struct pipe_video_context *vpipe;
+   vlVdpDevice *dev;
+   vlVdpDecoder *vldecoder;
+   VdpStatus ret;
 
    debug_printf("[VDPAU] Creating decoder\n");
 
@@ -54,197 +54,134 @@ vlVdpDecoderCreate(VdpDevice device,
    if (!(width && height))
       return VDP_STATUS_INVALID_VALUE;
 
-   vlVdpDevice *dev = vlGetDataHTAB(device);
-   if (!dev) {
-      ret = VDP_STATUS_INVALID_HANDLE;
-      goto inv_device;
-   }
+   p_profile = ProfileToPipe(profile);
+   if (p_profile == PIPE_VIDEO_PROFILE_UNKNOWN)
+      return VDP_STATUS_INVALID_DECODER_PROFILE;
+
+   dev = vlGetDataHTAB(device);
+   if (!dev)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   vpipe = dev->context->vpipe;
 
    vldecoder = CALLOC(1,sizeof(vlVdpDecoder));
-   if (!vldecoder) {
-      ret = VDP_STATUS_RESOURCES;
-      goto no_decoder;
-   }
+   if (!vldecoder)
+      return VDP_STATUS_RESOURCES;
 
-   p_profile = ProfileToPipe(profile);
-   if (p_profile == PIPE_VIDEO_PROFILE_UNKNOWN)	{
-      ret = VDP_STATUS_INVALID_DECODER_PROFILE;
-      goto inv_profile;
-   }
+   vldecoder->device = dev;
 
    // TODO: Define max_references. Used mainly for H264
+   vldecoder->decoder = vpipe->create_decoder
+   (
+      vpipe, p_profile,
+      PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
+      PIPE_VIDEO_CHROMA_FORMAT_420,
+      width, height
+   );
+   if (!vldecoder->decoder) {
+      ret = VDP_STATUS_ERROR;
+      goto error_decoder;
+   }
 
-   vldecoder->profile = p_profile;
-   vldecoder->height = height;
-   vldecoder->width = width;
-   vldecoder->device = dev;
-   vldecoder->vctx = NULL;
+   vldecoder->buffer = vldecoder->decoder->create_buffer(vldecoder->decoder);
+   if (!vldecoder->buffer) {
+      ret = VDP_STATUS_ERROR;
+      goto error_buffer;
+   }
 
    *decoder = vlAddDataHTAB(vldecoder);
    if (*decoder == 0) {
       ret = VDP_STATUS_ERROR;
-      goto no_handle;
+      goto error_handle;
    }
+
    debug_printf("[VDPAU] Decoder created succesfully\n");
 
    return VDP_STATUS_OK;
 
-no_handle:
+error_handle:
+   vldecoder->buffer->destroy(vldecoder->buffer);
+
+error_buffer:
+   vldecoder->decoder->destroy(vldecoder->decoder);
+
+error_decoder:
    FREE(vldecoder);
-   inv_profile:
-no_screen:
-no_decoder:
-inv_device:
-    return ret;
-#endif
+   return ret;
 }
 
 VdpStatus
 vlVdpDecoderDestroy(VdpDecoder decoder)
 {
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   vlVdpDecoder *vldecoder;
 
-#if 0
    debug_printf("[VDPAU] Destroying decoder\n");
-   vlVdpDecoder *vldecoder;
 
    vldecoder = (vlVdpDecoder *)vlGetDataHTAB(decoder);
-   if (!vldecoder) {
+   if (!vldecoder)
       return VDP_STATUS_INVALID_HANDLE;
-   }
-
-   if (vldecoder->vctx) {
-      if (vldecoder->vctx->vscreen)
-         vl_screen_destroy(vldecoder->vctx->vscreen);
-   }
 
-   if (vldecoder->vctx)
-       vl_video_destroy(vldecoder->vctx);
+   vldecoder->buffer->destroy(vldecoder->buffer);
+   vldecoder->decoder->destroy(vldecoder->decoder);
 
    FREE(vldecoder);
 
    return VDP_STATUS_OK;
-#endif
 }
 
-#if 0
 VdpStatus
-vlVdpCreateSurfaceTarget(vlVdpDecoder *vldecoder, vlVdpSurface *vlsurf)
+vlVdpDecoderGetParameters(VdpDecoder decoder,
+                          VdpDecoderProfile *profile,
+                          uint32_t *width,
+                          uint32_t *height)
 {
-   struct pipe_surface surf_template;
-   struct pipe_resource tmplt;
-   struct pipe_resource *surf_tex;
-   struct pipe_video_context *vctx;
-
-   debug_printf("[VDPAU] Creating surface\n");
-
-   if(!(vldecoder && vlsurf))
-      return VDP_STATUS_INVALID_POINTER;
-
-   vctx = vldecoder->vctx->vpipe;
-
-   if (!vctx->is_format_supported(vctx, tmplt.format, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
-      return VDP_STATUS_RESOURCES;
-
-   memset(&tmplt, 0, sizeof(struct pipe_resource));
-   tmplt.target = PIPE_TEXTURE_2D;
-   // TODO
-   //tmplt.format = vctx->get_param(vctx,PIPE_CAP_DECODE_TARGET_PREFERRED_FORMAT);
-   tmplt.last_level = 0;
-   tmplt.width0 = vlsurf->width;
-   tmplt.height0 = vlsurf->height;
-   tmplt.depth0 = 1;
-   tmplt.usage = PIPE_USAGE_DEFAULT;
-   tmplt.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-   tmplt.flags = 0;
-
-   surf_tex = vctx->screen->resource_create(vctx->screen, &tmplt);
-
-   memset(&surf_template, 0, sizeof(surf_template));
-   surf_template.format = surf_tex->format;
-   surf_template.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-   //vlsurf->psurface = vctx->create_surface(vctx->screen, surf_tex, &surf_template);
-
-   pipe_resource_reference(&surf_tex, NULL);
-
-   //if (!vlsurf->psurface)
-   //   return VDP_STATUS_RESOURCES;
-   debug_printf("[VDPAU] Done creating surface\n");
-
    return VDP_STATUS_OK;
 }
 
-VdpStatus
-vlVdpDecoderRenderMpeg2(vlVdpDecoder *vldecoder,
-                        vlVdpSurface *vlsurf,
+static VdpStatus
+vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder,
+                        struct pipe_video_decode_buffer *buffer,
+                        struct pipe_video_buffer *target,
                         VdpPictureInfoMPEG1Or2 *picture_info,
                         uint32_t bitstream_buffer_count,
                         VdpBitstreamBuffer const *bitstream_buffers)
 {
-   struct pipe_video_context *vpipe;
-   vlVdpSurface *t_vdp_surf;
-   vlVdpSurface *p_vdp_surf;
-   vlVdpSurface *f_vdp_surf;
-   struct pipe_surface *t_surf;
-   struct pipe_surface *p_surf;
-   struct pipe_surface *f_surf;
-   uint32_t num_macroblocks;
-   struct pipe_mpeg12_macroblock *pipe_macroblocks;
-   VdpStatus ret;
+   struct pipe_video_buffer *ref_frames[2];
 
    debug_printf("[VDPAU] Decoding MPEG2\n");
 
-   t_vdp_surf = vlsurf;
-
    /* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */
    if (picture_info->backward_reference ==  VDP_INVALID_HANDLE)
-      p_vdp_surf = NULL;
+      ref_frames[0] = NULL;
    else {
-      p_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference);
-      if (!p_vdp_surf)
+      ref_frames[0] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference))->video_buffer;
+      if (!ref_frames[0])
          return VDP_STATUS_INVALID_HANDLE;
    }
 
    if (picture_info->forward_reference ==  VDP_INVALID_HANDLE)
-      f_vdp_surf = NULL;
+      ref_frames[1] = NULL;
    else {
-      f_vdp_surf = (vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference);
-      if (!f_vdp_surf)
+      ref_frames[1] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference))->video_buffer;
+      if (!ref_frames[1])
          return VDP_STATUS_INVALID_HANDLE;
    }
 
-   if (f_vdp_surf ==  VDP_INVALID_HANDLE) f_vdp_surf = NULL;
-
-   ret = vlVdpCreateSurfaceTarget(vldecoder,t_vdp_surf);
-
-   vpipe = vldecoder->vctx->vpipe;
-
-   if (vlVdpMPEG2BitstreamToMacroblock(vpipe->screen, bitstream_buffers, bitstream_buffer_count,
-                                       &num_macroblocks, &pipe_macroblocks))
-   {
-      debug_printf("[VDPAU] Error in frame-header. Skipping.\n");
-
-      ret = VDP_STATUS_OK;
-      goto skip_frame;
-   }
+   //if (vlVdpMPEG2BitstreamToMacroblock(vpipe->screen, bitstream_buffers, bitstream_buffer_count,
+   //                                    &num_macroblocks, &pipe_macroblocks))
+   //{
+   //   debug_printf("[VDPAU] Error in frame-header. Skipping.\n");
+   //
+   //   ret = VDP_STATUS_OK;
+   //   goto skip_frame;
+   //}
 
    // TODO
    //vpipe->set_decode_target(vpipe,t_surf);
    //vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks,
    //                          (struct pipe_macroblock *)pipe_macroblocks, NULL);
 
-   skip_frame:
-   return ret;
-}
-#endif
-
-VdpStatus
-vlVdpDecoderGetParameters(VdpDecoder decoder,
-                          VdpDecoderProfile *profile,
-                          uint32_t *width,
-                          uint32_t *height)
-{
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
@@ -254,13 +191,8 @@ vlVdpDecoderRender(VdpDecoder decoder,
                    uint32_t bitstream_buffer_count,
                    VdpBitstreamBuffer const *bitstream_buffers)
 {
-   return VDP_STATUS_NO_IMPLEMENTATION;
-
-#if 0
    vlVdpDecoder *vldecoder;
    vlVdpSurface *vlsurf;
-   struct vl_screen *vscreen;
-   VdpStatus ret;
 
    debug_printf("[VDPAU] Decoding\n");
 
@@ -278,42 +210,20 @@ vlVdpDecoderRender(VdpDecoder decoder,
    if (vlsurf->device != vldecoder->device)
       return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
 
-   /* Test doesn't make sence */
-   /*if (vlsurf->chroma_format != vldecoder->chroma_format)
-   return VDP_STATUS_INVALID_CHROMA_TYPE;*/
-
-   vscreen = vl_screen_create(vldecoder->device->display, vldecoder->device->screen);
-   if (!vscreen)
-      return VDP_STATUS_RESOURCES;
-
-   vldecoder->vctx = vl_video_create(vscreen, vldecoder->profile, vlsurf->chroma_format, vldecoder->width, vldecoder->height);
-   if (!vldecoder->vctx)
-      return VDP_STATUS_RESOURCES;
+   if (vlsurf->video_buffer->chroma_format != vldecoder->decoder->chroma_format)
+      // TODO: Recreate decoder with correct chroma
+      return VDP_STATUS_INVALID_CHROMA_TYPE;
 
    // TODO: Right now only mpeg2 is supported.
-   switch (vldecoder->vctx->vpipe->profile)   {
+   switch (vldecoder->decoder->profile)   {
    case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
    case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
-      ret = vlVdpDecoderRenderMpeg2(vldecoder,vlsurf,(VdpPictureInfoMPEG1Or2 *)picture_info,
-                                    bitstream_buffer_count,bitstream_buffers);
+      return vlVdpDecoderRenderMpeg2(vldecoder->decoder, vldecoder->buffer,
+                                     vlsurf, (VdpPictureInfoMPEG1Or2 *)picture_info,
+                                     bitstream_buffer_count,bitstream_buffers);
       break;
+
    default:
       return VDP_STATUS_INVALID_DECODER_PROFILE;
    }
-   assert(0);
-
-   return ret;
-#endif
-}
-
-VdpStatus
-vlVdpGenerateCSCMatrix(VdpProcamp *procamp,
-                       VdpColorStandard standard,
-                       VdpCSCMatrix *csc_matrix)
-{
-   debug_printf("[VDPAU] Generating CSCMatrix\n");
-   if (!(csc_matrix && procamp))
-      return VDP_STATUS_INVALID_POINTER;
-
-   return VDP_STATUS_OK;
 }
diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index b02d591eb05..0c0d200c6d4 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -222,3 +222,15 @@ vlVdpVideoMixerGetAttributeValues(VdpVideoMixer mixer,
 {
    return VDP_STATUS_NO_IMPLEMENTATION;
 }
+
+VdpStatus
+vlVdpGenerateCSCMatrix(VdpProcamp *procamp,
+                       VdpColorStandard standard,
+                       VdpCSCMatrix *csc_matrix)
+{
+   debug_printf("[VDPAU] Generating CSCMatrix\n");
+   if (!(csc_matrix && procamp))
+      return VDP_STATUS_INVALID_POINTER;
+
+   return VDP_STATUS_OK;
+}
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 40d26511cf7..5931a2dda34 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -160,10 +160,8 @@ ProfileToPipe(VdpDecoderProfile vdpau_profile)
       case VDP_DECODER_PROFILE_H264_HIGH:
 	     return PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH;
       default:
-         PIPE_VIDEO_PROFILE_UNKNOWN;
+         return PIPE_VIDEO_PROFILE_UNKNOWN;
    }
-
-   return -1;
 }
 
 typedef struct
@@ -209,11 +207,8 @@ typedef struct
 typedef struct
 {
    vlVdpDevice *device;
-   struct vl_context *vctx;
-   enum pipe_video_chroma_format chroma_format;
-   enum pipe_video_profile profile;
-   uint32_t width;
-   uint32_t height;
+   struct pipe_video_decoder *decoder;
+   struct pipe_video_decode_buffer *buffer;
 } vlVdpDecoder;
 
 typedef uint32_t vlHandle;
-- 
cgit v1.2.3


From f6f773189b1fd315416470a91081f6ec0261dad7 Mon Sep 17 00:00:00 2001
From: Johannes Obermayr <johannesobermayr@gmx.de>
Date: Wed, 27 Apr 2011 18:51:59 +0200
Subject: [g3dvl] Fix compile error.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 src/gallium/drivers/nvfx/nvfx_video_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nvfx/nvfx_video_context.c b/src/gallium/drivers/nvfx/nvfx_video_context.c
index 9c9ccfe317d..ff9931b5409 100644
--- a/src/gallium/drivers/nvfx/nvfx_video_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_video_context.c
@@ -40,5 +40,5 @@ nvfx_video_create(struct pipe_screen *screen, void *priv)
    if (!pipe)
       return NULL;
 
-   return vl_create_context(pipe, profile, true);
+   return vl_create_context(pipe, true);
 }
-- 
cgit v1.2.3


From f20608b951dd629ecc1fceb111a876c1c70c57a9 Mon Sep 17 00:00:00 2001
From: Johannes Obermayr <johannesobermayr@gmx.de>
Date: Thu, 28 Apr 2011 11:15:15 +0200
Subject: [g3dvl] Name of pkg-config file is libva.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 src/gallium/state_trackers/va/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/va/Makefile b/src/gallium/state_trackers/va/Makefile
index dd303ebace9..d5b3ec3caf2 100644
--- a/src/gallium/state_trackers/va/Makefile
+++ b/src/gallium/state_trackers/va/Makefile
@@ -8,7 +8,7 @@ VA_MINOR = 3
 LIBRARY_DEFINES = -DVER_MAJOR=$(VA_MAJOR) -DVER_MINOR=$(VA_MINOR) $(STATE_TRACKER_DEFINES)
 
 LIBRARY_INCLUDES = \
-	$(shell pkg-config --cflags-only-I va) \
+	$(shell pkg-config --cflags-only-I libva) \
 	-I$(TOP)/src/gallium/winsys/g3dvl
 
 C_SOURCES = htab.c \
-- 
cgit v1.2.3


From c888fe027c338f337123de4da2de1ac73b0f7587 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 4 May 2011 18:56:32 +0200
Subject: [g3dvl] Implement MPEG2 VLD

Based uppon xine's slice_xvmc.c.
This gets VDPAU up and running.
---
 src/gallium/auxiliary/Makefile                 |    1 +
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 1962 ++++++++++++++++++++++++
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h |   59 +
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   |   28 +
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h   |    2 +
 src/gallium/auxiliary/vl/vl_vlc.h              |  138 ++
 src/gallium/include/pipe/p_video_context.h     |    7 +-
 src/gallium/include/pipe/p_video_state.h       |   32 +-
 src/gallium/state_trackers/vdpau/decode.c      |   52 +-
 9 files changed, 2246 insertions(+), 35 deletions(-)
 create mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
 create mode 100644 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
 create mode 100644 src/gallium/auxiliary/vl/vl_vlc.h

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 428e097be09..3fd1d5923c0 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -153,6 +153,7 @@ C_SOURCES = \
 	vl/vl_mpeg12_decoder.c \
 	vl/vl_compositor.c \
 	vl/vl_csc.c \
+	vl/vl_mpeg12_bitstream.c \
 	vl/vl_zscan.c \
         vl/vl_idct.c \
 	vl/vl_mc.c \
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
new file mode 100644
index 00000000000..8955ad5175b
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -0,0 +1,1962 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * This file is based uppon slice_xvmc.c and vlc.h from the xine project,
+ * which in turn is based on mpeg2dec. The following is the original copyright:
+ *
+ * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <stdint.h>
+
+#include <pipe/p_video_state.h>
+
+#include "vl_vlc.h"
+#include "vl_mpeg12_bitstream.h"
+
+/* take num bits from the high part of bit_buf and zero extend them */
+#define UBITS(buf,num) (((uint32_t)(buf)) >> (32 - (num)))
+
+/* take num bits from the high part of bit_buf and sign extend them */
+#define SBITS(buf,num) (((int32_t)(buf)) >> (32 - (num)))
+
+#define SATURATE(val)			\
+do {					\
+   if ((uint32_t)(val + 2048) > 4095)	\
+      val = (val > 0) ? 2047 : -2048;	\
+} while (0)
+
+/* macroblock modes */
+#define MACROBLOCK_INTRA 1
+#define MACROBLOCK_PATTERN 2
+#define MACROBLOCK_MOTION_BACKWARD 4
+#define MACROBLOCK_MOTION_FORWARD 8
+#define MACROBLOCK_QUANT 16
+#define DCT_TYPE_INTERLACED 32
+
+/* motion_type */
+#define MOTION_TYPE_MASK (3*64)
+#define MOTION_TYPE_BASE 64
+#define MC_FIELD (1*64)
+#define MC_FRAME (2*64)
+#define MC_16X8 (2*64)
+#define MC_DMV (3*64)
+
+/* picture structure */
+#define TOP_FIELD     1
+#define BOTTOM_FIELD  2
+#define FRAME_PICTURE 3
+
+/* picture coding type (mpeg2 header) */
+#define I_TYPE 1
+#define P_TYPE 2
+#define B_TYPE 3
+#define D_TYPE 4
+
+typedef struct {
+   uint8_t modes;
+   uint8_t len;
+} MBtab;
+
+typedef struct {
+   uint8_t delta;
+   uint8_t len;
+} MVtab;
+
+typedef struct {
+   int8_t dmv;
+   uint8_t len;
+} DMVtab;
+
+typedef struct {
+   uint8_t cbp;
+   uint8_t len;
+} CBPtab;
+
+typedef struct {
+   uint8_t size;
+   uint8_t len;
+} DCtab;
+
+typedef struct {
+   uint8_t run;
+   uint8_t level;
+   uint8_t len;
+} DCTtab;
+
+typedef struct {
+   uint8_t mba;
+   uint8_t len;
+} MBAtab;
+
+#define INTRA MACROBLOCK_INTRA
+#define QUANT MACROBLOCK_QUANT
+#define MC MACROBLOCK_MOTION_FORWARD
+#define CODED MACROBLOCK_PATTERN
+#define FWD MACROBLOCK_MOTION_FORWARD
+#define BWD MACROBLOCK_MOTION_BACKWARD
+#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD
+
+static const MBtab MB_I [] = {
+   {INTRA|QUANT, 2}, {INTRA, 1}
+};
+
+static const MBtab MB_P [] = {
+   {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA,    5},
+   {MC,          3}, {MC,          3}, {MC,             3}, {MC,       3},
+   {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
+   {CODED,       2}, {CODED,       2}, {CODED,          2}, {CODED,    2},
+   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
+   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
+   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1},
+   {MC|CODED,    1}, {MC|CODED,    1}, {MC|CODED,       1}, {MC|CODED, 1}
+};
+
+static const MBtab MB_B [] = {
+   {0,                 0}, {INTRA|QUANT,       6},
+   {BWD|CODED|QUANT,   6}, {FWD|CODED|QUANT,   6},
+   {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5},
+                                     {INTRA,       5}, {INTRA,       5},
+   {FWD,         4}, {FWD,         4}, {FWD,         4}, {FWD,         4},
+   {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4}, {FWD|CODED,   4},
+   {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
+   {BWD,         3}, {BWD,         3}, {BWD,         3}, {BWD,         3},
+   {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
+   {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3}, {BWD|CODED,   3},
+   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+   {INTER,       2}, {INTER,       2}, {INTER,       2}, {INTER,       2},
+   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2},
+   {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}
+};
+
+#undef INTRA
+#undef QUANT
+#undef MC
+#undef CODED
+#undef FWD
+#undef BWD
+#undef INTER
+
+static const MVtab MV_4 [] = {
+   { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2}
+};
+
+static const MVtab MV_10 [] = {
+   { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10},
+   { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10},
+   {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9},
+   { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7},
+   { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7},
+   { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}
+};
+
+static const DMVtab DMV_2 [] = {
+   { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2}
+};
+
+static const CBPtab CBP_7 [] = {
+   {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7},
+   {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7},
+   {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6},
+   {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6},
+   {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5},
+   {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5},
+   {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5},
+   {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5},
+   {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5},
+   {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5},
+   {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5},
+   {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5},
+   {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5},
+   {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5},
+   {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5},
+   {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5},
+   {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
+   {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4},
+   {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
+   {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4},
+   {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
+   {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4},
+   {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
+   {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4},
+   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
+   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
+   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3},
+   {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}
+};
+
+static const CBPtab CBP_9 [] = {
+   {0,    0}, {0x00, 9}, {0x27, 9}, {0x1b, 9},
+   {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9},
+   {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8},
+   {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8},
+   {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8},
+   {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8},
+   {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8},
+   {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8},
+   {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8},
+   {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8},
+   {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8},
+   {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8},
+   {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8},
+   {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8},
+   {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8},
+   {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8}
+};
+
+static const DCtab DC_lum_5 [] = {
+   {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+   {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+   {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3},
+   {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5}
+};
+
+static const DCtab DC_chrom_5 [] = {
+   {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2},
+   {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2},
+   {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
+   {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5}
+};
+
+static const DCtab DC_long [] = {
+   {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
+   {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5},
+   {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6},
+   {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9}
+};
+
+static const DCTtab DCT_16 [] = {
+   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+   {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0},
+   {  2,18, 0}, {  2,17, 0}, {  2,16, 0}, {  2,15, 0},
+   {  7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0},
+   { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0},
+   { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0}
+};
+
+static const DCTtab DCT_15 [] = {
+   {  1,40,15}, {  1,39,15}, {  1,38,15}, {  1,37,15},
+   {  1,36,15}, {  1,35,15}, {  1,34,15}, {  1,33,15},
+   {  1,32,15}, {  2,14,15}, {  2,13,15}, {  2,12,15},
+   {  2,11,15}, {  2,10,15}, {  2, 9,15}, {  2, 8,15},
+   {  1,31,14}, {  1,31,14}, {  1,30,14}, {  1,30,14},
+   {  1,29,14}, {  1,29,14}, {  1,28,14}, {  1,28,14},
+   {  1,27,14}, {  1,27,14}, {  1,26,14}, {  1,26,14},
+   {  1,25,14}, {  1,25,14}, {  1,24,14}, {  1,24,14},
+   {  1,23,14}, {  1,23,14}, {  1,22,14}, {  1,22,14},
+   {  1,21,14}, {  1,21,14}, {  1,20,14}, {  1,20,14},
+   {  1,19,14}, {  1,19,14}, {  1,18,14}, {  1,18,14},
+   {  1,17,14}, {  1,17,14}, {  1,16,14}, {  1,16,14}
+};
+
+static const DCTtab DCT_13 [] = {
+   { 11, 2,13}, { 10, 2,13}, {  6, 3,13}, {  4, 4,13},
+   {  3, 5,13}, {  2, 7,13}, {  2, 6,13}, {  1,15,13},
+   {  1,14,13}, {  1,13,13}, {  1,12,13}, { 27, 1,13},
+   { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13},
+   {  1,11,12}, {  1,11,12}, {  9, 2,12}, {  9, 2,12},
+   {  5, 3,12}, {  5, 3,12}, {  1,10,12}, {  1,10,12},
+   {  3, 4,12}, {  3, 4,12}, {  8, 2,12}, {  8, 2,12},
+   { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12},
+   {  1, 9,12}, {  1, 9,12}, { 20, 1,12}, { 20, 1,12},
+   { 19, 1,12}, { 19, 1,12}, {  2, 5,12}, {  2, 5,12},
+   {  4, 3,12}, {  4, 3,12}, {  1, 8,12}, {  1, 8,12},
+   {  7, 2,12}, {  7, 2,12}, { 18, 1,12}, { 18, 1,12}
+};
+
+static const DCTtab DCT_B14_10 [] = {
+   { 17, 1,10}, {  6, 2,10}, {  1, 7,10}, {  3, 3,10},
+   {  2, 4,10}, { 16, 1,10}, { 15, 1,10}, {  5, 2,10}
+};
+
+static const DCTtab DCT_B14_8 [] = {
+   { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6},
+   {  3, 2, 7}, {  3, 2, 7}, { 10, 1, 7}, { 10, 1, 7},
+   {  1, 4, 7}, {  1, 4, 7}, {  9, 1, 7}, {  9, 1, 7},
+   {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6}, {  8, 1, 6},
+   {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6}, {  7, 1, 6},
+   {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6}, {  2, 2, 6},
+   {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
+   { 14, 1, 8}, {  1, 6, 8}, { 13, 1, 8}, { 12, 1, 8},
+   {  4, 2, 8}, {  2, 3, 8}, {  1, 5, 8}, { 11, 1, 8}
+};
+
+static const DCTtab DCT_B14AC_5 [] = {
+                {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
+   {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
+   {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}
+};
+
+static const DCTtab DCT_B14DC_5 [] = {
+                {  1, 3, 5}, {  5, 1, 5}, {  4, 1, 5},
+   {  1, 2, 4}, {  1, 2, 4}, {  3, 1, 4}, {  3, 1, 4},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
+   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
+   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1},
+   {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}, {  1, 1, 1}
+};
+
+static const DCTtab DCT_B15_10 [] = {
+   {  6, 2, 9}, {  6, 2, 9}, { 15, 1, 9}, { 15, 1, 9},
+   {  3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9}
+};
+
+static const DCTtab DCT_B15_8 [] = {
+   { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6},
+   {  8, 1, 7}, {  8, 1, 7}, {  9, 1, 7}, {  9, 1, 7},
+   {  7, 1, 7}, {  7, 1, 7}, {  3, 2, 7}, {  3, 2, 7},
+   {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6}, {  1, 7, 6},
+   {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6}, {  1, 6, 6},
+   {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6}, {  5, 1, 6},
+   {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6}, {  6, 1, 6},
+   {  2, 5, 8}, { 12, 1, 8}, {  1,11, 8}, {  1,10, 8},
+   { 14, 1, 8}, { 13, 1, 8}, {  4, 2, 8}, {  2, 4, 8},
+   {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
+   {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5}, {  3, 1, 5},
+   {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
+   {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5}, {  2, 2, 5},
+   {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
+   {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5}, {  4, 1, 5},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3}, {  2, 1, 3},
+   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+   {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4},
+   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+   {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4}, {  1, 3, 4},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2}, {  1, 1, 2},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3}, {  1, 2, 3},
+   {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
+   {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5}, {  1, 4, 5},
+   {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
+   {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5}, {  1, 5, 5},
+   { 10, 1, 7}, { 10, 1, 7}, {  2, 3, 7}, {  2, 3, 7},
+   { 11, 1, 7}, { 11, 1, 7}, {  1, 8, 7}, {  1, 8, 7},
+   {  1, 9, 7}, {  1, 9, 7}, {  1,12, 8}, {  1,13, 8},
+   {  3, 3, 8}, {  5, 2, 8}, {  1,14, 8}, {  1,15, 8}
+};
+
+static const MBAtab MBA_5 [] = {
+                   {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4},
+   {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3},
+   {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1},
+   {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}
+};
+
+static const MBAtab MBA_11 [] = {
+   {32, 11}, {31, 11}, {30, 11}, {29, 11},
+   {28, 11}, {27, 11}, {26, 11}, {25, 11},
+   {24, 11}, {23, 11}, {22, 11}, {21, 11},
+   {20, 10}, {20, 10}, {19, 10}, {19, 10},
+   {18, 10}, {18, 10}, {17, 10}, {17, 10},
+   {16, 10}, {16, 10}, {15, 10}, {15, 10},
+   {14,  8}, {14,  8}, {14,  8}, {14,  8},
+   {14,  8}, {14,  8}, {14,  8}, {14,  8},
+   {13,  8}, {13,  8}, {13,  8}, {13,  8},
+   {13,  8}, {13,  8}, {13,  8}, {13,  8},
+   {12,  8}, {12,  8}, {12,  8}, {12,  8},
+   {12,  8}, {12,  8}, {12,  8}, {12,  8},
+   {11,  8}, {11,  8}, {11,  8}, {11,  8},
+   {11,  8}, {11,  8}, {11,  8}, {11,  8},
+   {10,  8}, {10,  8}, {10,  8}, {10,  8},
+   {10,  8}, {10,  8}, {10,  8}, {10,  8},
+   { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
+   { 9,  8}, { 9,  8}, { 9,  8}, { 9,  8},
+   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+   { 8,  7}, { 8,  7}, { 8,  7}, { 8,  7},
+   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
+   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
+   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
+   { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7}
+};
+
+/* original (non-patched) scan tables */
+static const uint8_t mpeg2_scan_norm_orig[64] =
+{
+   /* Zig-Zag scan pattern */
+    0, 1, 8,16, 9, 2, 3,10,
+   17,24,32,25,18,11, 4, 5,
+   12,19,26,33,40,48,41,34,
+   27,20,13, 6, 7,14,21,28,
+   35,42,49,56,57,50,43,36,
+   29,22,15,23,30,37,44,51,
+   58,59,52,45,38,31,39,46,
+   53,60,61,54,47,55,62,63
+};
+
+static const uint8_t mpeg2_scan_alt_orig[64] =
+{
+   /* Alternate scan pattern */
+   0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49,
+   41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43,
+   51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45,
+   53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63
+};
+
+static uint8_t mpeg2_scan_alt_ptable[64];
+static uint8_t mpeg2_scan_norm_ptable[64];
+static uint8_t mpeg2_scan_orig_ptable[64];
+
+static inline void
+setup_scan_ptable( void )
+{
+   int i;
+   for (i=0; i<64; ++i) {
+      mpeg2_scan_norm_ptable[mpeg2_scan_norm_orig[i]] = mpeg2_scan_norm_orig[i];
+      mpeg2_scan_alt_ptable[mpeg2_scan_alt_orig[i]] = mpeg2_scan_alt_orig[i];
+      mpeg2_scan_orig_ptable[i] = i;
+   }
+}
+
+static const int non_linear_quantizer_scale[] = {
+   0,  1,  2,  3,  4,  5,   6,   7,
+   8, 10, 12, 14, 16, 18,  20,  22,
+   24, 28, 32, 36, 40, 44,  48,  52,
+   56, 64, 72, 80, 88, 96, 104, 112
+};
+
+static inline int
+get_macroblock_modes(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture)
+{
+   int macroblock_modes;
+   const MBtab * tab;
+
+   switch (picture->picture_coding_type) {
+   case I_TYPE:
+
+      tab = MB_I + vl_vlc_ubits(&bs->vlc, 1);
+      vl_vlc_dumpbits(&bs->vlc, tab->len);
+      macroblock_modes = tab->modes;
+
+      if ((!(picture->frame_pred_frame_dct)) && (picture->picture_structure == FRAME_PICTURE)) {
+         macroblock_modes |= vl_vlc_ubits(&bs->vlc, 1) * DCT_TYPE_INTERLACED;
+         vl_vlc_dumpbits(&bs->vlc, 1);
+      }
+
+      return macroblock_modes;
+
+   case P_TYPE:
+
+      tab = MB_P + vl_vlc_ubits(&bs->vlc, 5);
+      vl_vlc_dumpbits(&bs->vlc, tab->len);
+      macroblock_modes = tab->modes;
+
+      if (picture->picture_structure != FRAME_PICTURE) {
+         if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
+            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
+            vl_vlc_dumpbits(&bs->vlc, 2);
+          }
+          return macroblock_modes;
+      } else if (picture->frame_pred_frame_dct) {
+          if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
+            macroblock_modes |= MC_FRAME;
+          return macroblock_modes;
+      } else {
+          if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) {
+            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
+            vl_vlc_dumpbits(&bs->vlc, 2);
+          }
+          if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
+            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 1) * DCT_TYPE_INTERLACED;
+            vl_vlc_dumpbits(&bs->vlc, 1);
+          }
+          return macroblock_modes;
+      }
+
+   case B_TYPE:
+
+      tab = MB_B + vl_vlc_ubits(&bs->vlc, 6);
+      vl_vlc_dumpbits(&bs->vlc, tab->len);
+      macroblock_modes = tab->modes;
+
+      if (picture->picture_structure != FRAME_PICTURE) {
+          if (! (macroblock_modes & MACROBLOCK_INTRA)) {
+            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
+            vl_vlc_dumpbits(&bs->vlc, 2);
+          }
+          return macroblock_modes;
+      } else if (picture->frame_pred_frame_dct) {
+          /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
+          macroblock_modes |= MC_FRAME;
+          return macroblock_modes;
+      } else {
+          if (macroblock_modes & MACROBLOCK_INTRA)
+            goto intra;
+          macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
+          vl_vlc_dumpbits(&bs->vlc, 2);
+          if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
+          intra:
+            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 1) * DCT_TYPE_INTERLACED;
+            vl_vlc_dumpbits(&bs->vlc, 1);
+          }
+          return macroblock_modes;
+      }
+
+   case D_TYPE:
+
+      vl_vlc_dumpbits(&bs->vlc, 1);
+      return MACROBLOCK_INTRA;
+
+   default:
+      return 0;
+   }
+}
+
+static inline int
+get_quantizer_scale(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture)
+{
+   int quantizer_scale_code;
+
+   quantizer_scale_code = vl_vlc_ubits(&bs->vlc, 5);
+   vl_vlc_dumpbits(&bs->vlc, 5);
+
+   if (picture->q_scale_type)
+      return non_linear_quantizer_scale[quantizer_scale_code];
+   else
+      return quantizer_scale_code << 1;
+}
+
+static inline int
+get_motion_delta(struct vl_mpg12_bs *bs, unsigned f_code)
+{
+   int delta;
+   int sign;
+   const MVtab * tab;
+
+   if (bs->vlc.buf & 0x80000000) {
+      vl_vlc_dumpbits(&bs->vlc, 1);
+      return 0;
+   } else if (bs->vlc.buf >= 0x0c000000) {
+
+      tab = MV_4 + vl_vlc_ubits(&bs->vlc, 4);
+      delta = (tab->delta << f_code) + 1;
+      bs->vlc.bits += tab->len + f_code + 1;
+      bs->vlc.buf <<= tab->len;
+
+      sign = vl_vlc_sbits(&bs->vlc, 1);
+      bs->vlc.buf <<= 1;
+
+      if (f_code)
+         delta += vl_vlc_ubits(&bs->vlc, f_code);
+      bs->vlc.buf <<= f_code;
+
+      return (delta ^ sign) - sign;
+
+   } else {
+
+      tab = MV_10 + vl_vlc_ubits(&bs->vlc, 10);
+      delta = (tab->delta << f_code) + 1;
+      bs->vlc.bits += tab->len + 1;
+      bs->vlc.buf <<= tab->len;
+
+      sign = vl_vlc_sbits(&bs->vlc, 1);
+      bs->vlc.buf <<= 1;
+
+      if (f_code) {
+         vl_vlc_needbits(&bs->vlc);
+         delta += vl_vlc_ubits(&bs->vlc, f_code);
+         vl_vlc_dumpbits(&bs->vlc, f_code);
+      }
+
+      return (delta ^ sign) - sign;
+   }
+}
+
+static inline int
+bound_motion_vector(int vec, unsigned f_code)
+{
+#if 1
+   unsigned int limit;
+   int sign;
+
+   limit = 16 << f_code;
+
+   if ((unsigned int)(vec + limit) < 2 * limit)
+      return vec;
+   else {
+      sign = ((int32_t)vec) >> 31;
+      return vec - ((2 * limit) ^ sign) + sign;
+   }
+#else
+   return ((int32_t)vec << (28 - f_code)) >> (28 - f_code);
+#endif
+}
+
+static inline int
+get_dmv(struct vl_mpg12_bs *bs)
+{
+   const DMVtab * tab;
+
+   tab = DMV_2 + vl_vlc_ubits(&bs->vlc, 2);
+   vl_vlc_dumpbits(&bs->vlc, tab->len);
+   return tab->dmv;
+}
+
+static inline int
+get_coded_block_pattern(struct vl_mpg12_bs *bs)
+{
+   const CBPtab * tab;
+
+   vl_vlc_needbits(&bs->vlc);
+
+   if (bs->vlc.buf >= 0x20000000) {
+
+      tab = CBP_7 + (vl_vlc_ubits(&bs->vlc, 7) - 16);
+      vl_vlc_dumpbits(&bs->vlc, tab->len);
+      return tab->cbp;
+
+   } else {
+
+      tab = CBP_9 + vl_vlc_ubits(&bs->vlc, 9);
+      vl_vlc_dumpbits(&bs->vlc, tab->len);
+      return tab->cbp;
+   }
+}
+
+static inline int
+get_luma_dc_dct_diff(struct vl_mpg12_bs *bs)
+{
+   const DCtab * tab;
+   int size;
+   int dc_diff;
+
+   if (bs->vlc.buf < 0xf8000000) {
+      tab = DC_lum_5 + vl_vlc_ubits(&bs->vlc, 5);
+      size = tab->size;
+      if (size) {
+         bs->vlc.bits += tab->len + size;
+         bs->vlc.buf <<= tab->len;
+         dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
+         bs->vlc.buf <<= size;
+         return dc_diff;
+      } else {
+         vl_vlc_dumpbits(&bs->vlc, 3);
+         return 0;
+      }
+   } else {
+      tab = DC_long + (vl_vlc_ubits(&bs->vlc, 9) - 0x1e0);
+      size = tab->size;
+      vl_vlc_dumpbits(&bs->vlc, tab->len);
+      vl_vlc_needbits(&bs->vlc);
+      dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
+      vl_vlc_dumpbits(&bs->vlc, size);
+      return dc_diff;
+   }
+}
+
+static inline int
+get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs)
+{
+   const DCtab * tab;
+   int size;
+   int dc_diff;
+
+   if (bs->vlc.buf < 0xf8000000) {
+      tab = DC_chrom_5 + vl_vlc_ubits(&bs->vlc, 5);
+      size = tab->size;
+      if (size) {
+         bs->vlc.bits += tab->len + size;
+         bs->vlc.buf <<= tab->len;
+         dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
+         bs->vlc.buf <<= size;
+         return dc_diff;
+      } else {
+         vl_vlc_dumpbits(&bs->vlc, 2);
+         return 0;
+      }
+   } else {
+      tab = DC_long + (vl_vlc_ubits(&bs->vlc, 10) - 0x3e0);
+      size = tab->size;
+      vl_vlc_dumpbits(&bs->vlc, tab->len + 1);
+      vl_vlc_needbits(&bs->vlc);
+      dc_diff = vl_vlc_ubits(&bs->vlc, size) - UBITS (SBITS (~bs->vlc.buf, 1), size);
+      vl_vlc_dumpbits(&bs->vlc, size);
+      return dc_diff;
+   }
+}
+
+static inline void
+get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
+{
+   int i, j, l, val;
+   const uint8_t *scan;
+   uint8_t *scan_ptable;
+   uint8_t *quant_matrix = picture->intra_quantizer_matrix;
+   int quantizer_scale = picture->quantizer_scale;
+   int mismatch;
+   const DCTtab *tab;
+
+   if (!picture->alternate_scan) {
+      scan =  mpeg2_scan_norm_orig;
+      scan_ptable = mpeg2_scan_norm_ptable;
+   } else {
+      scan = mpeg2_scan_alt_orig;
+      scan_ptable = mpeg2_scan_alt_ptable;
+   }
+
+   i = 0;
+   mismatch = ~dest[0];
+
+   vl_vlc_needbits(&bs->vlc);
+
+   while (1) {
+      if (bs->vlc.buf >= 0x28000000) {
+
+         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
+
+         i += tab->run;
+         if (i >= 64)
+            break;	/* end of block */
+
+      normal_code:
+         l = scan_ptable[j = scan[i]];
+
+         bs->vlc.buf <<= tab->len;
+         bs->vlc.bits += tab->len + 1;
+         val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4;
+
+         /* if (bitstream_get (1)) val = -val; */
+         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
+
+         SATURATE (val);
+         dest[j] = val;
+         mismatch ^= val;
+
+         bs->vlc.buf <<= 1;
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      } else if (bs->vlc.buf >= 0x04000000) {
+
+         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
+
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+
+         /* escape code */
+
+         i += UBITS(bs->vlc.buf << 6, 6) - 64;
+         if (i >= 64)
+            break;	/* illegal, check needed to avoid buffer overflow */
+
+         l = scan_ptable[j = scan[i]];
+
+         vl_vlc_dumpbits(&bs->vlc, 12);
+         vl_vlc_needbits(&bs->vlc);
+         val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[l]) / 16;
+
+         SATURATE (val);
+         dest[j] = val;
+         mismatch ^= val;
+
+         vl_vlc_dumpbits(&bs->vlc, 12);
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      } else if (bs->vlc.buf >= 0x02000000) {
+         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00800000) {
+         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00200000) {
+         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else {
+         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
+         bs->vlc.buf <<= 16;
+         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      }
+      break;	/* illegal, check needed to avoid buffer overflow */
+   }
+
+   dest[63] ^= mismatch & 1;
+   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
+}
+
+static inline void
+get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
+{
+   int i, j, l, val;
+   const uint8_t *scan;
+   uint8_t *scan_ptable;
+   uint8_t *quant_matrix = picture->intra_quantizer_matrix;
+   int quantizer_scale = picture->quantizer_scale;
+   int mismatch;
+   const DCTtab * tab;
+
+   if (!picture->alternate_scan) {
+      scan =  mpeg2_scan_norm_orig;
+      scan_ptable = mpeg2_scan_norm_ptable;
+   } else {
+      scan = mpeg2_scan_alt_orig;
+      scan_ptable = mpeg2_scan_alt_ptable;
+   }
+
+   i = 0;
+   mismatch = ~dest[0];
+
+   vl_vlc_needbits(&bs->vlc);
+
+   while (1) {
+      if (bs->vlc.buf >= 0x04000000) {
+
+         tab = DCT_B15_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
+
+         i += tab->run;
+         if (i < 64) {
+
+         normal_code:
+            l = scan_ptable[j = scan[i]];
+            bs->vlc.buf <<= tab->len;
+            bs->vlc.bits += tab->len + 1;
+            val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4;
+
+            /* if (bitstream_get (1)) val = -val; */
+            val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
+
+            SATURATE (val);
+            dest[j] = val;
+            mismatch ^= val;
+
+            bs->vlc.buf <<= 1;
+            vl_vlc_needbits(&bs->vlc);
+
+            continue;
+
+         } else {
+
+            /* end of block. I commented out this code because if we */
+            /* dont exit here we will still exit at the later test :) */
+
+            /* if (i >= 128) break;	*/	/* end of block */
+
+            /* escape code */
+
+            i += UBITS(bs->vlc.buf << 6, 6) - 64;
+            if (i >= 64)
+                break;	/* illegal, check against buffer overflow */
+
+            l = scan_ptable[j = scan[i]];
+
+            vl_vlc_dumpbits(&bs->vlc, 12);
+            vl_vlc_needbits(&bs->vlc);
+            val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[l]) / 16;
+
+            SATURATE (val);
+            dest[j] = val;
+            mismatch ^= val;
+
+            vl_vlc_dumpbits(&bs->vlc, 12);
+            vl_vlc_needbits(&bs->vlc);
+
+            continue;
+
+          }
+      } else if (bs->vlc.buf >= 0x02000000) {
+         tab = DCT_B15_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00800000) {
+         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00200000) {
+         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else {
+         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
+         bs->vlc.buf <<= 16;
+         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      }
+      break;	/* illegal, check needed to avoid buffer overflow */
+   }
+
+   dest[63] ^= mismatch & 1;
+   vl_vlc_dumpbits(&bs->vlc, 4);	/* dump end of block code */
+}
+
+static inline void
+get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
+{
+   int i, j, l, val;
+   const uint8_t *scan;
+   uint8_t *scan_ptable;
+   uint8_t *quant_matrix = picture->non_intra_quantizer_matrix;
+   int quantizer_scale = picture->quantizer_scale;
+   int mismatch;
+   const DCTtab *tab;
+
+   i = -1;
+   mismatch = 1;
+
+   if (!picture->alternate_scan) {
+      scan =  mpeg2_scan_norm_orig;
+      scan_ptable = mpeg2_scan_norm_ptable;
+   } else {
+      scan = mpeg2_scan_alt_orig;
+      scan_ptable = mpeg2_scan_alt_ptable;
+   }
+
+   vl_vlc_needbits(&bs->vlc);
+   if (bs->vlc.buf >= 0x28000000) {
+      tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
+      goto entry_1;
+   } else
+      goto entry_2;
+
+   while (1) {
+      if (bs->vlc.buf >= 0x28000000) {
+
+         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
+
+      entry_1:
+         i += tab->run;
+         if (i >= 64)
+            break;	/* end of block */
+
+      normal_code:
+         l = scan_ptable[j = scan[i]];
+         bs->vlc.buf <<= tab->len;
+         bs->vlc.bits += tab->len + 1;
+         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[l]) >> 5;
+
+         /* if (bitstream_get (1)) val = -val; */
+         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
+
+         SATURATE (val);
+         dest[j] = val;
+         mismatch ^= val;
+
+         bs->vlc.buf <<= 1;
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      }
+
+   entry_2:
+      if (bs->vlc.buf >= 0x04000000) {
+
+         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
+
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+
+         /* escape code */
+
+         i += UBITS(bs->vlc.buf << 6, 6) - 64;
+         if (i >= 64)
+            break;	/* illegal, check needed to avoid buffer overflow */
+
+         l = scan_ptable[j = scan[i]];
+
+         vl_vlc_dumpbits(&bs->vlc, 12);
+         vl_vlc_needbits(&bs->vlc);
+         val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1;
+         val = (val * quantizer_scale * quant_matrix[l]) / 32;
+
+         SATURATE (val);
+         dest[j] = val;
+         mismatch ^= val;
+
+         vl_vlc_dumpbits(&bs->vlc, 12);
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      } else if (bs->vlc.buf >= 0x02000000) {
+         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00800000) {
+         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00200000) {
+         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else {
+         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
+         bs->vlc.buf <<= 16;
+         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      }
+      break;	/* illegal, check needed to avoid buffer overflow */
+   }
+   dest[63] ^= mismatch & 1;
+   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
+}
+
+static inline void
+get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
+{
+   int i, j, l, val;
+   const uint8_t *scan;
+   uint8_t *scan_ptable;
+   uint8_t *quant_matrix = picture->intra_quantizer_matrix;
+   int quantizer_scale = picture->quantizer_scale;
+   const DCTtab * tab;
+
+   i = 0;
+
+   if (!picture->alternate_scan) {
+      scan =  mpeg2_scan_norm_orig;
+      scan_ptable = mpeg2_scan_norm_ptable;
+   } else {
+      scan = mpeg2_scan_alt_orig;
+      scan_ptable = mpeg2_scan_alt_ptable;
+   }
+
+   vl_vlc_needbits(&bs->vlc);
+
+   while (1) {
+      if (bs->vlc.buf >= 0x28000000) {
+
+         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
+
+         i += tab->run;
+         if (i >= 64)
+            break;	/* end of block */
+
+      normal_code:
+         l = scan_ptable[j = scan[i]];
+         bs->vlc.buf <<= tab->len;
+         bs->vlc.bits += tab->len + 1;
+         val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4;
+
+         /* oddification */
+         val = (val - 1) | 1;
+
+         /* if (bitstream_get (1)) val = -val; */
+         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
+
+         SATURATE (val);
+         dest[j] = val;
+
+         bs->vlc.buf <<= 1;
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      } else if (bs->vlc.buf >= 0x04000000) {
+
+         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
+
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+
+         /* escape code */
+
+         i += UBITS(bs->vlc.buf << 6, 6) - 64;
+         if (i >= 64)
+            break;	/* illegal, check needed to avoid buffer overflow */
+
+         l = scan_ptable[j = scan[i]];
+
+         vl_vlc_dumpbits(&bs->vlc, 12);
+         vl_vlc_needbits(&bs->vlc);
+         val = vl_vlc_sbits(&bs->vlc, 8);
+         if (! (val & 0x7f)) {
+            vl_vlc_dumpbits(&bs->vlc, 8);
+            val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
+         }
+         val = (val * quantizer_scale * quant_matrix[l]) / 16;
+
+         /* oddification */
+         val = (val + ~SBITS (val, 1)) | 1;
+
+         SATURATE (val);
+         dest[j] = val;
+
+         vl_vlc_dumpbits(&bs->vlc, 8);
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      } else if (bs->vlc.buf >= 0x02000000) {
+         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00800000) {
+         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00200000) {
+         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else {
+         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
+         bs->vlc.buf <<= 16;
+         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      }
+      break;	/* illegal, check needed to avoid buffer overflow */
+   }
+   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
+}
+
+static inline void
+get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
+{
+   int i, j, l, val;
+   const uint8_t * scan;
+   uint8_t *scan_ptable;
+   uint8_t *quant_matrix = picture->non_intra_quantizer_matrix;
+   int quantizer_scale = picture->quantizer_scale;
+   const DCTtab * tab;
+
+   i = -1;
+
+   if (!picture->alternate_scan) {
+      scan =  mpeg2_scan_norm_orig;
+      scan_ptable = mpeg2_scan_norm_ptable;
+   } else {
+      scan = mpeg2_scan_alt_orig;
+      scan_ptable = mpeg2_scan_alt_ptable;
+   }
+
+   vl_vlc_needbits(&bs->vlc);
+   if (bs->vlc.buf >= 0x28000000) {
+      tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
+      goto entry_1;
+   } else
+      goto entry_2;
+
+   while (1) {
+      if (bs->vlc.buf >= 0x28000000) {
+
+         tab = DCT_B14AC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
+
+      entry_1:
+         i += tab->run;
+         if (i >= 64)
+            break;	/* end of block */
+
+      normal_code:
+         l = scan_ptable[j = scan[i]];
+         bs->vlc.buf <<= tab->len;
+         bs->vlc.bits += tab->len + 1;
+         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[l]) >> 5;
+
+         /* oddification */
+         val = (val - 1) | 1;
+
+         /* if (bitstream_get (1)) val = -val; */
+         val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
+
+         SATURATE (val);
+         dest[j] = val;
+
+         bs->vlc.buf <<= 1;
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      }
+
+   entry_2:
+      if (bs->vlc.buf >= 0x04000000) {
+
+         tab = DCT_B14_8 + (vl_vlc_ubits(&bs->vlc, 8) - 4);
+
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+
+         /* escape code */
+
+         i += UBITS(bs->vlc.buf << 6, 6) - 64;
+         if (i >= 64)
+            break;	/* illegal, check needed to avoid buffer overflow */
+
+         l = scan_ptable[j = scan[i]];
+
+         vl_vlc_dumpbits(&bs->vlc, 12);
+         vl_vlc_needbits(&bs->vlc);
+         val = vl_vlc_sbits(&bs->vlc, 8);
+         if (! (val & 0x7f)) {
+            vl_vlc_dumpbits(&bs->vlc, 8);
+            val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
+         }
+         val = 2 * (val + SBITS (val, 1)) + 1;
+         val = (val * quantizer_scale * quant_matrix[l]) / 32;
+
+         /* oddification */
+         val = (val + ~SBITS (val, 1)) | 1;
+
+         SATURATE (val);
+         dest[j] = val;
+
+         vl_vlc_dumpbits(&bs->vlc, 8);
+         vl_vlc_needbits(&bs->vlc);
+
+         continue;
+
+      } else if (bs->vlc.buf >= 0x02000000) {
+         tab = DCT_B14_10 + (vl_vlc_ubits(&bs->vlc, 10) - 8);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00800000) {
+         tab = DCT_13 + (vl_vlc_ubits(&bs->vlc, 13) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else if (bs->vlc.buf >= 0x00200000) {
+         tab = DCT_15 + (vl_vlc_ubits(&bs->vlc, 15) - 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      } else {
+         tab = DCT_16 + vl_vlc_ubits(&bs->vlc, 16);
+         bs->vlc.buf <<= 16;
+         vl_vlc_getword(&bs->vlc, bs->vlc.bits + 16);
+         i += tab->run;
+         if (i < 64)
+            goto normal_code;
+      }
+      break;	/* illegal, check needed to avoid buffer overflow */
+   }
+   vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
+}
+
+static inline void
+slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
+                unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding)
+{
+   short *dest = bs->ycbcr_buffer[cc];
+
+   bs->ycbcr_stream[cc]->x = x;
+   bs->ycbcr_stream[cc]->y = y;
+   bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_INTRA;
+   bs->ycbcr_stream[cc]->coding = coding;
+
+   vl_vlc_needbits(&bs->vlc);
+
+   /* Get the intra DC coefficient and inverse quantize it */
+   if (cc == 0)
+      picture->dc_dct_pred[0] += get_luma_dc_dct_diff(bs);
+   else
+      picture->dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs);
+
+   memset(dest, 0, sizeof(int16_t) * 64);
+   dest[0] = picture->dc_dct_pred[cc] << (3 - picture->intra_dc_precision);
+   if (picture->mpeg1) {
+      if (picture->picture_coding_type != D_TYPE)
+          get_mpeg1_intra_block(bs, picture, dest);
+   } else if (picture->intra_vlc_format)
+      get_intra_block_B15(bs, picture, dest);
+   else
+      get_intra_block_B14(bs, picture, dest);
+
+   bs->num_ycbcr_blocks[cc]++;
+   bs->ycbcr_stream[cc]++;
+   bs->ycbcr_buffer[cc] += 64;
+}
+
+static inline void
+slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
+                    unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding)
+{
+   short *dest = bs->ycbcr_buffer[cc];
+
+   bs->ycbcr_stream[cc]->x = x;
+   bs->ycbcr_stream[cc]->y = y;
+   bs->ycbcr_stream[cc]->intra = PIPE_MPEG12_DCT_DELTA;
+   bs->ycbcr_stream[cc]->coding = coding;
+
+   memset(dest, 0, sizeof(int16_t) * 64);
+   if (picture->mpeg1)
+      get_mpeg1_non_intra_block(bs, picture, dest);
+   else
+      get_non_intra_block(bs, picture, dest);
+
+   bs->num_ycbcr_blocks[cc]++;
+   bs->ycbcr_stream[cc]++;
+   bs->ycbcr_buffer[cc] += 64;
+}
+
+static inline void
+motion_mp1(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_x = (mv->top.x + (get_motion_delta(bs, f_code[0]) << f_code[1]));
+   motion_x = bound_motion_vector (motion_x, f_code[0] + f_code[1]);
+   mv->top.x = mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = (mv->top.y + (get_motion_delta(bs, f_code[0]) << f_code[1]));
+   motion_y = bound_motion_vector (motion_y, f_code[0] + f_code[1]);
+   mv->top.y = mv->bottom.y = motion_y;
+}
+
+static inline void
+motion_fr_frame(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector(motion_x, f_code[0]);
+   mv->top.x = mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
+   motion_y = bound_motion_vector(motion_y, f_code[1]);
+   mv->top.y = mv->bottom.y = motion_y;
+}
+
+static inline void
+motion_fr_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   vl_vlc_needbits(&bs->vlc);
+   mv->top.field_select = vl_vlc_ubits(&bs->vlc, 1) ?
+      PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
+   vl_vlc_dumpbits(&bs->vlc, 1);
+
+   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector (motion_x, f_code[0]);
+   mv->top.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]);
+   /* motion_y = bound_motion_vector (motion_y, f_code[1]); */
+   mv->top.y = motion_y << 1;
+
+   vl_vlc_needbits(&bs->vlc);
+   mv->bottom.field_select = vl_vlc_ubits(&bs->vlc, 1) ?
+      PIPE_VIDEO_BOTTOM_FIELD : PIPE_VIDEO_TOP_FIELD;
+   vl_vlc_dumpbits(&bs->vlc, 1);
+
+   motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector (motion_x, f_code[0]);
+   mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = (mv->bottom.y >> 1) + get_motion_delta(bs, f_code[1]);
+   /* motion_y = bound_motion_vector (motion_y, f_code[1]); */
+   mv->bottom.y = motion_y << 1;
+}
+
+static inline void
+motion_fr_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   // TODO Implement dmv
+   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector(motion_x, f_code[0]);
+   mv->top.x = mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = (mv->top.y >> 1) + get_motion_delta(bs, f_code[1]);
+   /* motion_y = bound_motion_vector (motion_y, f_code[1]); */
+   mv->top.y = mv->bottom.y = motion_y << 1;
+}
+
+/* like motion_frame, but parsing without actual motion compensation */
+static inline void
+motion_fr_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int tmp;
+
+   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
+
+   vl_vlc_needbits(&bs->vlc);
+   tmp = (mv->top.x + get_motion_delta(bs, f_code[0]));
+   tmp = bound_motion_vector (tmp, f_code[0]);
+   mv->top.x = mv->bottom.x = tmp;
+
+   vl_vlc_needbits(&bs->vlc);
+   tmp = (mv->top.y + get_motion_delta(bs, f_code[1]));
+   tmp = bound_motion_vector (tmp, f_code[1]);
+   mv->top.y = mv->bottom.y = tmp;
+
+   vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */
+}
+
+static inline void
+motion_fi_field(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   vl_vlc_needbits(&bs->vlc);
+
+   // ref_field
+   //vl_vlc_ubits(&bs->vlc, 1);
+
+   // TODO field select may need to do something here for bob (weave ok)
+   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
+   vl_vlc_dumpbits(&bs->vlc, 1);
+
+   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector (motion_x, f_code[0]);
+   mv->top.x = mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
+   motion_y = bound_motion_vector (motion_y, f_code[1]);
+   mv->top.y = mv->bottom.y = motion_y;
+}
+
+static inline void
+motion_fi_16x8(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   vl_vlc_needbits(&bs->vlc);
+
+   // ref_field
+   //vl_vlc_ubits(&bs->vlc, 1);
+
+   // TODO field select may need to do something here bob  (weave ok)
+   mv->top.field_select = PIPE_VIDEO_FRAME;
+   vl_vlc_dumpbits(&bs->vlc, 1);
+
+   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector (motion_x, f_code[0]);
+   mv->top.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
+   motion_y = bound_motion_vector (motion_y, f_code[1]);
+   mv->top.y = motion_y;
+
+   vl_vlc_needbits(&bs->vlc);
+   // ref_field
+   //vl_vlc_ubits(&bs->vlc, 1);
+
+   // TODO field select may need to do something here for bob (weave ok)
+   mv->bottom.field_select = PIPE_VIDEO_FRAME;
+   vl_vlc_dumpbits(&bs->vlc, 1);
+
+   motion_x = mv->bottom.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector (motion_x, f_code[0]);
+   mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = mv->bottom.y + get_motion_delta(bs, f_code[1]);
+   motion_y = bound_motion_vector (motion_y, f_code[1]);
+   mv->bottom.y = motion_y;
+}
+
+static inline void
+motion_fi_dmv(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int motion_x, motion_y;
+
+   // TODO field select may need to do something here for bob  (weave ok)
+   mv->top.field_select = mv->bottom.field_select = PIPE_VIDEO_FRAME;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_x = mv->top.x + get_motion_delta(bs, f_code[0]);
+   motion_x = bound_motion_vector (motion_x, f_code[0]);
+   mv->top.x = mv->bottom.x = motion_x;
+
+   vl_vlc_needbits(&bs->vlc);
+   motion_y = mv->top.y + get_motion_delta(bs, f_code[1]);
+   motion_y = bound_motion_vector (motion_y, f_code[1]);
+   mv->top.y = mv->bottom.y = motion_y;
+}
+
+
+static inline void
+motion_fi_conceal(struct vl_mpg12_bs *bs, unsigned f_code[2], struct pipe_motionvector *mv)
+{
+   int tmp;
+
+   vl_vlc_needbits(&bs->vlc);
+   vl_vlc_dumpbits(&bs->vlc, 1); /* remove field_select */
+
+   tmp = (mv->top.x + get_motion_delta(bs, f_code[0]));
+   tmp = bound_motion_vector(tmp, f_code[0]);
+   mv->top.x = mv->bottom.x = tmp;
+
+   vl_vlc_needbits(&bs->vlc);
+   tmp = (mv->top.y + get_motion_delta(bs, f_code[1]));
+   tmp = bound_motion_vector(tmp, f_code[1]);
+   mv->top.y = mv->bottom.y = tmp;
+
+   vl_vlc_dumpbits(&bs->vlc, 1); /* remove marker_bit */
+}
+
+#define MOTION_CALL(routine, macroblock_modes)		\
+do {							\
+   if ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD)  \
+      routine(bs, picture->f_code[0], &mv_fwd);         \
+   if ((macroblock_modes) & MACROBLOCK_MOTION_BACKWARD)	\
+      routine(bs, picture->f_code[1], &mv_bwd);         \
+} while (0)
+
+#define NEXT_MACROBLOCK		                \
+do {				                \
+   bs->mv_stream[0][x+y*bs->width/16] = mv_fwd; \
+   bs->mv_stream[1][x+y*bs->width/16] = mv_bwd; \
+   ++x;				                \
+   if (x == bs->width/16) {	                \
+      ++y;                                      \
+      if (y >= bs->height/16)                   \
+         return false;                          \
+      x = 0;                                    \
+   }                                            \
+} while (0)
+
+static inline bool
+slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int *x, int *y)
+{
+   const MBAtab * mba;
+
+   vl_vlc_need32bits(&bs->vlc);
+   while(bs->vlc.buf < 0x101 || bs->vlc.buf > 0x1AF) {
+      if(!vl_vlc_getbyte(&bs->vlc))
+         return false;
+   }
+   *y = ((bs->vlc.buf & 0xFF) - 1) * 16;
+   vl_vlc_restart(&bs->vlc);
+
+   //TODO conversion to signed format signed format
+   picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = picture->dc_dct_pred[2] = 0;
+
+   picture->quantizer_scale = get_quantizer_scale(bs, picture);
+
+   /* ignore intra_slice and all the extra data */
+   while (bs->vlc.buf & 0x80000000) {
+      vl_vlc_dumpbits(&bs->vlc, 9);
+      vl_vlc_needbits(&bs->vlc);
+   }
+
+   /* decode initial macroblock address increment */
+   *x = 0;
+   while (1) {
+      if (bs->vlc.buf >= 0x08000000) {
+          mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 6) - 2);
+          break;
+      } else if (bs->vlc.buf >= 0x01800000) {
+          mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 12) - 24);
+          break;
+      } else switch (vl_vlc_ubits(&bs->vlc, 12)) {
+      case 8:		/* macroblock_escape */
+          *x += 33;
+          vl_vlc_dumpbits(&bs->vlc, 11);
+          vl_vlc_needbits(&bs->vlc);
+          continue;
+      case 15:	/* macroblock_stuffing (MPEG1 only) */
+          bs->vlc.buf &= 0xfffff;
+          vl_vlc_dumpbits(&bs->vlc, 11);
+          vl_vlc_needbits(&bs->vlc);
+          continue;
+      default:	/* error */
+          return false;
+      }
+   }
+   vl_vlc_dumpbits(&bs->vlc, mba->len + 1);
+   *x = (*x + mba->mba) << 4;
+
+   while (*x >= bs->width) {
+      *x -= bs->width;
+      *y += 16;
+   }
+   if (*y > bs->height)
+      return false;
+
+   return true;
+}
+
+static inline bool
+decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
+{
+   struct pipe_motionvector mv_fwd, mv_bwd;
+   enum pipe_mpeg12_dct_type dct_type;
+   int x, y;
+
+   if (!slice_init(bs, picture, &x, &y))
+      return false;
+
+   mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
+   mv_fwd.top.field_select = mv_fwd.bottom.field_select = PIPE_VIDEO_FRAME;
+
+   mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
+   mv_bwd.top.field_select = mv_bwd.bottom.field_select = PIPE_VIDEO_FRAME;
+
+   x /= 16;
+   y /= 16;
+
+   while (1) {
+      int macroblock_modes;
+      int mba_inc;
+      const MBAtab * mba;
+
+      vl_vlc_needbits(&bs->vlc);
+
+      macroblock_modes = get_macroblock_modes(bs, picture); //macroblock_modes()
+      dct_type = macroblock_modes & DCT_TYPE_INTERLACED ?
+         PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
+
+      switch(macroblock_modes & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD)) {
+      case (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD):
+         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF;
+         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_HALF;
+         break;
+
+      default:
+      case MACROBLOCK_MOTION_FORWARD:
+         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+         break;
+
+      case MACROBLOCK_MOTION_BACKWARD:
+         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+         break;
+      }
+
+      /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
+      if (macroblock_modes & MACROBLOCK_QUANT)
+         picture->quantizer_scale = get_quantizer_scale(bs, picture);
+
+      if (macroblock_modes & MACROBLOCK_INTRA) {
+
+         if (picture->concealment_motion_vectors) {
+            if (picture->picture_structure == FRAME_PICTURE)
+               motion_fr_conceal(bs, picture->f_code[0], &mv_fwd);
+            else
+               motion_fi_conceal(bs, picture->f_code[0], &mv_fwd);
+
+         } else {
+            mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
+            mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
+         }
+         mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+         mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+
+         // unravaled loop of 6 block(i) calls in macroblock()
+         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type);
+         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type);
+         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type);
+         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type);
+         slice_intra_DCT(bs, picture, 1, x, y, dct_type);
+         slice_intra_DCT(bs, picture, 2, x, y, dct_type);
+
+         if (picture->picture_coding_type == D_TYPE) {
+            vl_vlc_needbits(&bs->vlc);
+            vl_vlc_dumpbits(&bs->vlc, 1);
+         }
+
+      } else {
+         if (picture->picture_structure == FRAME_PICTURE)
+            switch (macroblock_modes & MOTION_TYPE_MASK) {
+            case MC_FRAME:
+               if (picture->mpeg1) {
+                  MOTION_CALL(motion_mp1, macroblock_modes);
+               } else {
+                  MOTION_CALL(motion_fr_frame, macroblock_modes);
+               }
+               break;
+
+            case MC_FIELD:
+               MOTION_CALL (motion_fr_field, macroblock_modes);
+               break;
+
+            case MC_DMV:
+               MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD);
+               break;
+
+            case 0:
+               /* non-intra mb without forward mv in a P picture */
+               mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
+               mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
+               break;
+            }
+         else
+            switch (macroblock_modes & MOTION_TYPE_MASK) {
+            case MC_FIELD:
+               MOTION_CALL (motion_fi_field, macroblock_modes);
+               break;
+
+            case MC_16X8:
+               MOTION_CALL (motion_fi_16x8, macroblock_modes);
+               break;
+
+            case MC_DMV:
+               MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD);
+               break;
+
+            case 0:
+               /* non-intra mb without forward mv in a P picture */
+               mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
+               mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
+               break;
+            }
+
+         if (macroblock_modes & MACROBLOCK_PATTERN) {
+            int coded_block_pattern = get_coded_block_pattern(bs);
+
+            // TODO  optimize not fully used for idct accel only mc.
+            if (coded_block_pattern & 0x20)
+               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type); // cc0  luma 0
+            if (coded_block_pattern & 0x10)
+               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type); // cc0 luma 1
+            if (coded_block_pattern & 0x08)
+               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type); // cc0 luma 2
+            if (coded_block_pattern & 0x04)
+               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type); // cc0 luma 3
+            if (coded_block_pattern & 0x2)
+               slice_non_intra_DCT(bs, picture, 1, x, y, dct_type); // cc1 croma
+            if (coded_block_pattern & 0x1)
+               slice_non_intra_DCT(bs, picture, 2, x, y, dct_type); // cc2 croma
+         }
+
+         picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = picture->dc_dct_pred[2] = 0;
+      }
+
+      NEXT_MACROBLOCK;
+
+      vl_vlc_needbits(&bs->vlc);
+      mba_inc = 0;
+      while (1) {
+         if (bs->vlc.buf >= 0x10000000) {
+            mba = MBA_5 + (vl_vlc_ubits(&bs->vlc, 5) - 2);
+            break;
+         } else if (bs->vlc.buf >= 0x03000000) {
+            mba = MBA_11 + (vl_vlc_ubits(&bs->vlc, 11) - 24);
+            break;
+         } else switch (vl_vlc_ubits(&bs->vlc, 11)) {
+         case 8:		/* macroblock_escape */
+            mba_inc += 33;
+            /* pass through */
+         case 15:	/* macroblock_stuffing (MPEG1 only) */
+            vl_vlc_dumpbits(&bs->vlc, 11);
+            vl_vlc_needbits(&bs->vlc);
+            continue;
+         default:	/* end of slice, or error */
+            return true;
+         }
+      }
+      vl_vlc_dumpbits(&bs->vlc, mba->len);
+      mba_inc += mba->mba;
+      if (mba_inc) {
+         //TODO  conversion to signed format signed format
+         picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = picture->dc_dct_pred[2] = 0;
+
+         switch(picture->picture_structure) {
+         case FRAME_PICTURE:
+            mv_fwd.top.field_select = mv_fwd.bottom.field_select = PIPE_VIDEO_FRAME;
+            mv_bwd.top.field_select = mv_bwd.bottom.field_select = PIPE_VIDEO_FRAME;
+            break;
+
+         case TOP_FIELD:
+            mv_fwd.top.field_select = mv_fwd.bottom.field_select = PIPE_VIDEO_TOP_FIELD;
+            mv_bwd.top.field_select = mv_bwd.bottom.field_select = PIPE_VIDEO_TOP_FIELD;
+            break;
+
+         case BOTTOM_FIELD:
+            mv_fwd.top.field_select = mv_fwd.bottom.field_select = PIPE_VIDEO_BOTTOM_FIELD;
+            mv_bwd.top.field_select = mv_bwd.bottom.field_select = PIPE_VIDEO_BOTTOM_FIELD;
+            break;
+         }
+
+         if (picture->picture_coding_type == P_TYPE) {
+            mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
+            mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+         }
+         do {
+            NEXT_MACROBLOCK;
+         } while (--mba_inc);
+      }
+   }
+}
+
+void
+vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height)
+{
+   assert(bs);
+
+   memset(bs, 0, sizeof(struct vl_mpg12_bs));
+
+   bs->width = width;
+   bs->height = height;
+
+   setup_scan_ptable();
+}
+
+void
+vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
+                        short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES])
+{
+   unsigned i;
+
+   assert(bs);
+   assert(ycbcr_stream && ycbcr_buffer);
+   assert(mv_stream);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
+      bs->ycbcr_stream[i] = ycbcr_stream[i];
+      bs->ycbcr_buffer[i] = ycbcr_buffer[i];
+   }
+   for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
+      bs->mv_stream[i] = mv_stream[i];
+
+   // TODO
+   for (i = 0; i < bs->width/16*bs->height/16; ++i) {
+      bs->mv_stream[0][i].top.x = bs->mv_stream[0][i].top.y = 0;
+      bs->mv_stream[0][i].top.field_select = PIPE_VIDEO_FRAME;
+      bs->mv_stream[0][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+      bs->mv_stream[0][i].bottom.x = bs->mv_stream[0][i].bottom.y = 0;
+      bs->mv_stream[0][i].bottom.field_select = PIPE_VIDEO_FRAME;
+      bs->mv_stream[0][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
+
+      bs->mv_stream[1][i].top.x = bs->mv_stream[1][i].top.y = 0;
+      bs->mv_stream[1][i].top.field_select = PIPE_VIDEO_FRAME;
+      bs->mv_stream[1][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+      bs->mv_stream[1][i].bottom.x = bs->mv_stream[1][i].bottom.y = 0;
+      bs->mv_stream[1][i].bottom.field_select = PIPE_VIDEO_FRAME;
+      bs->mv_stream[1][i].bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
+   }
+}
+
+void
+vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
+                   struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3])
+{
+   assert(bs);
+   assert(num_ycbcr_blocks);
+   assert(buffer && num_bytes);
+
+   bs->num_ycbcr_blocks = num_ycbcr_blocks;
+
+   vl_vlc_init(&bs->vlc, buffer, num_bytes);
+
+   while(decode_slice(bs, picture));
+}
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
new file mode 100644
index 00000000000..4e48a9faa2f
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h
@@ -0,0 +1,59 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_mpeg12_bitstream_h
+#define vl_mpeg12_bitstream_h
+
+#include "vl_defines.h"
+#include "vl_vlc.h"
+
+struct vl_mpg12_bs
+{
+   unsigned width, height;
+
+   struct vl_vlc vlc;
+
+   unsigned *num_ycbcr_blocks;
+
+   struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
+   short *ycbcr_buffer[VL_MAX_PLANES];
+
+   struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES];
+};
+
+void
+vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height);
+
+void
+vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES],
+                        short *ycbcr_buffer[VL_MAX_PLANES], struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES]);
+
+void
+vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
+                   struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3]);
+
+#endif /* vl_mpeg12_bitstream_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 74ec4b1db7b..b78844b9cfd 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -346,6 +346,19 @@ vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)
 
       buf->texels[i] = dec->pipe->transfer_map(dec->pipe, buf->tex_transfer[i]);
    }
+
+   if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+      struct pipe_ycbcr_block *ycbcr_stream[VL_MAX_PLANES];
+      struct pipe_motionvector *mv_stream[VL_MAX_REF_FRAMES];
+
+      for (i = 0; i < VL_MAX_PLANES; ++i)
+         ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i);
+
+      for (i = 0; i < VL_MAX_REF_FRAMES; ++i)
+         mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
+
+      vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream);
+   }
 }
 
 static struct pipe_ycbcr_block *
@@ -389,6 +402,17 @@ vl_mpeg12_buffer_get_mv_stream(struct pipe_video_decode_buffer *buffer, int ref_
    return vl_vb_get_mv_stream(&buf->vertex_stream, ref_frame);
 }
 
+static void
+vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
+                                  unsigned num_bytes, const void *data,
+                                  struct pipe_mpeg12_picture_desc *picture,
+                                  unsigned num_ycbcr_blocks[3])
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+
+   vl_mpg12_bs_decode(&buf->bs, num_bytes, data, picture, num_ycbcr_blocks);
+}
+
 static void
 vl_mpeg12_buffer_unmap(struct pipe_video_decode_buffer *buffer)
 {
@@ -462,6 +486,7 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer;
    buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride;
    buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream;
+   buffer->base.decode_bitstream = vl_mpeg12_buffer_decode_bitstream;
    buffer->base.unmap = vl_mpeg12_buffer_unmap;
 
    if (!vl_vb_init(&buffer->vertex_stream, dec->pipe,
@@ -479,6 +504,9 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    if (!init_zscan_buffer(buffer))
       goto error_zscan;
 
+   if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
+      vl_mpg12_bs_init(&buffer->bs, dec->base.width, dec->base.height);
+
    return &buffer->base;
 
 error_zscan:
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index e483ace03b4..66356694b59 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -30,6 +30,7 @@
 
 #include <pipe/p_video_context.h>
 
+#include "vl_mpeg12_bitstream.h"
 #include "vl_zscan.h"
 #include "vl_idct.h"
 #include "vl_mc.h"
@@ -80,6 +81,7 @@ struct vl_mpeg12_buffer
    struct pipe_video_buffer *idct_source;
    struct pipe_video_buffer *mc_source;
 
+   struct vl_mpg12_bs bs;
    struct vl_zscan_buffer zscan[VL_MAX_PLANES];
    struct vl_idct_buffer idct[VL_MAX_PLANES];
    struct vl_mc_buffer mc[VL_MAX_PLANES];
diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h
new file mode 100644
index 00000000000..8c5b3aca47d
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_vlc.h
@@ -0,0 +1,138 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * This file is based uppon slice_xvmc.c and vlc.h from the xine project,
+ * which in turn is based on mpeg2dec. The following is the original copyright:
+ *
+ * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
+ * See http://libmpeg2.sourceforge.net/ for updates.
+ *
+ * mpeg2dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpeg2dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef vl_vlc_h
+#define vl_vlc_h
+
+struct vl_vlc
+{
+   uint32_t buf; /* current 32 bit working set of buffer */
+   int bits;     /* used bits in working set */
+   const uint8_t *ptr; /* buffer with stream data */
+   const uint8_t *max; /* ptr+len of buffer */
+};
+
+static inline void
+vl_vlc_restart(struct vl_vlc *vlc)
+{
+   vlc->buf = (vlc->ptr[0] << 24) | (vlc->ptr[1] << 16) | (vlc->ptr[2] << 8) | vlc->ptr[3];
+   vlc->bits = -16;
+   vlc->ptr += 4;
+}
+
+static inline void
+vl_vlc_init(struct vl_vlc *vlc, const uint8_t *data, unsigned len)
+{
+   vlc->ptr = data;
+   vlc->max = data + len;
+   vl_vlc_restart(vlc);
+}
+
+static inline bool
+vl_vlc_getbyte(struct vl_vlc *vlc)
+{
+   vlc->buf <<= 8;
+   vlc->buf |= vlc->ptr[0];
+   vlc->ptr++;
+   return vlc->ptr < vlc->max;
+}
+
+#define vl_vlc_getword(vlc, shift)                                      \
+do {                                                                    \
+   (vlc)->buf |= (((vlc)->ptr[0] << 8) | (vlc)->ptr[1]) << (shift);     \
+   (vlc)->ptr += 2;                                                     \
+} while (0)
+
+/* make sure that there are at least 16 valid bits in bit_buf */
+#define vl_vlc_needbits(vlc)                    \
+do {                                            \
+    if ((vlc)->bits >= 0) {                      \
+	vl_vlc_getword(vlc, (vlc)->bits);       \
+	(vlc)->bits -= 16;                      \
+    }                                           \
+} while (0)
+
+/* make sure that the full 32 bit of the buffer are valid */
+static inline void
+vl_vlc_need32bits(struct vl_vlc *vlc)
+{
+   vl_vlc_needbits(vlc);
+   if (vlc->bits > -8) {
+      unsigned n = -vlc->bits;
+      vlc->buf <<= n;
+      vlc->buf |= *vlc->ptr << 8;
+      vlc->bits = -8;
+      vlc->ptr++;
+   }
+   if (vlc->bits > -16) {
+      unsigned n = -vlc->bits - 8;
+      vlc->buf <<= n;
+      vlc->buf |= *vlc->ptr;
+      vlc->bits = -16;
+      vlc->ptr++;
+   }
+}
+
+/* remove num valid bits from bit_buf */
+#define vl_vlc_dumpbits(vlc, num)       \
+do {					\
+    (vlc)->buf <<= (num);		\
+    (vlc)->bits += (num);		\
+} while (0)
+
+/* take num bits from the high part of bit_buf and zero extend them */
+#define vl_vlc_ubits(vlc, num) (((uint32_t)((vlc)->buf)) >> (32 - (num)))
+
+/* take num bits from the high part of bit_buf and sign extend them */
+#define vl_vlc_sbits(vlc, num) (((int32_t)((vlc)->buf)) >> (32 - (num)))
+
+#endif /* vl_vlc_h */
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 80bbb6e1376..512b5b22d77 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -199,14 +199,13 @@ struct pipe_video_decode_buffer
     */
    struct pipe_motionvector *(*get_mv_stream)(struct pipe_video_decode_buffer *decbuf, int ref_frame);
 
-#if 0
    /**
     * decode a bitstream
     */
    void (*decode_bitstream)(struct pipe_video_decode_buffer *decbuf,
-                            unsigned num_bufs,
-                            struct pipe_buffer **bitstream_buf);
-#endif
+                            unsigned num_bytes, const void *data,
+                            struct pipe_mpeg12_picture_desc *picture,
+                            unsigned num_ycbcr_blocks[3]);
 
    /**
     * unmap decoder buffer before flushing
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 8bd84c2846a..54fb1b7a595 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -99,37 +99,43 @@ struct pipe_ycbcr_block
    enum pipe_mpeg12_dct_type coding:8;
 };
 
-#if 0
 struct pipe_picture_desc
 {
-   enum pipe_video_format format;
+   enum pipe_video_profile profile;
 };
 
 struct pipe_mpeg12_picture_desc
 {
    struct pipe_picture_desc base;
 
-   /* TODO: Use bitfields where possible? */
-   struct pipe_surface *forward_reference;
-   struct pipe_surface *backward_reference;
    unsigned picture_coding_type;
-   unsigned fcode;
-   unsigned intra_dc_precision;
    unsigned picture_structure;
-   unsigned top_field_first;
    unsigned frame_pred_frame_dct;
-   unsigned concealment_motion_vectors;
    unsigned q_scale_type;
-   unsigned intra_vlc_format;
    unsigned alternate_scan;
+   unsigned intra_dc_precision;
+   unsigned intra_vlc_format;
+   unsigned concealment_motion_vectors;
+   unsigned f_code[2][2];
+
+   bool mpeg1;
+   uint8_t *intra_quantizer_matrix;
+   uint8_t *non_intra_quantizer_matrix;
+
+   /* predictor for DC coefficients in intra blocks */
+   int16_t dc_dct_pred[3];
+
+   int quantizer_scale;
+
+#if 0
+   /* TODO: Use bitfields where possible? */
+   unsigned top_field_first;
    unsigned full_pel_forward_vector;
    unsigned full_pel_backward_vector;
-   struct pipe_buffer *intra_quantizer_matrix;
-   struct pipe_buffer *non_intra_quantizer_matrix;
    struct pipe_buffer *chroma_intra_quantizer_matrix;
    struct pipe_buffer *chroma_non_intra_quantizer_matrix;
-};
 #endif
+};
 
 #ifdef __cplusplus
 }
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index d8111a8243f..8e09cb61b11 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -146,40 +146,56 @@ vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder,
                         uint32_t bitstream_buffer_count,
                         VdpBitstreamBuffer const *bitstream_buffers)
 {
+   struct pipe_mpeg12_picture_desc picture;
    struct pipe_video_buffer *ref_frames[2];
+   unsigned num_ycbcr_blocks[3] = { 0, 0, 0 };
+   unsigned i;
 
    debug_printf("[VDPAU] Decoding MPEG2\n");
 
    /* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */
-   if (picture_info->backward_reference ==  VDP_INVALID_HANDLE)
+   if (picture_info->forward_reference ==  VDP_INVALID_HANDLE)
       ref_frames[0] = NULL;
    else {
-      ref_frames[0] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference))->video_buffer;
+      ref_frames[0] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference))->video_buffer;
       if (!ref_frames[0])
          return VDP_STATUS_INVALID_HANDLE;
    }
 
-   if (picture_info->forward_reference ==  VDP_INVALID_HANDLE)
+   if (picture_info->backward_reference ==  VDP_INVALID_HANDLE)
       ref_frames[1] = NULL;
    else {
-      ref_frames[1] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->forward_reference))->video_buffer;
+      ref_frames[1] = ((vlVdpSurface *)vlGetDataHTAB(picture_info->backward_reference))->video_buffer;
       if (!ref_frames[1])
          return VDP_STATUS_INVALID_HANDLE;
    }
 
-   //if (vlVdpMPEG2BitstreamToMacroblock(vpipe->screen, bitstream_buffers, bitstream_buffer_count,
-   //                                    &num_macroblocks, &pipe_macroblocks))
-   //{
-   //   debug_printf("[VDPAU] Error in frame-header. Skipping.\n");
-   //
-   //   ret = VDP_STATUS_OK;
-   //   goto skip_frame;
-   //}
+   memset(&picture, 0, sizeof(picture));
+   picture.picture_coding_type = picture_info->picture_coding_type;
+   picture.picture_structure = picture_info->picture_structure;
+   picture.frame_pred_frame_dct = picture_info->frame_pred_frame_dct;
+   picture.q_scale_type = picture_info->q_scale_type;
+   picture.alternate_scan = picture_info->alternate_scan;
+   picture.intra_dc_precision = picture_info->intra_dc_precision;
+   picture.intra_vlc_format = picture_info->intra_vlc_format;
+   picture.concealment_motion_vectors = picture_info->concealment_motion_vectors;
+   picture.f_code[0][0] = picture_info->f_code[0][0] - 1;
+   picture.f_code[0][1] = picture_info->f_code[0][1] - 1;
+   picture.f_code[1][0] = picture_info->f_code[1][0] - 1;
+   picture.f_code[1][1] = picture_info->f_code[1][1] - 1;
+
+   picture.intra_quantizer_matrix = picture_info->intra_quantizer_matrix;
+   picture.non_intra_quantizer_matrix = picture_info->non_intra_quantizer_matrix;
+
+   buffer->map(buffer);
+
+   for (i = 0; i < bitstream_buffer_count; ++i)
+      buffer->decode_bitstream(buffer, bitstream_buffers[i].bitstream_bytes,
+                               bitstream_buffers[i].bitstream, &picture, num_ycbcr_blocks);
+
+   buffer->unmap(buffer);
 
-   // TODO
-   //vpipe->set_decode_target(vpipe,t_surf);
-   //vpipe->decode_macroblocks(vpipe, p_surf, f_surf, num_macroblocks,
-   //                          (struct pipe_macroblock *)pipe_macroblocks, NULL);
+   decoder->flush_buffer(buffer, num_ycbcr_blocks, ref_frames, target);
 
    return VDP_STATUS_OK;
 }
@@ -218,8 +234,8 @@ vlVdpDecoderRender(VdpDecoder decoder,
    switch (vldecoder->decoder->profile)   {
    case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
    case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
-      return vlVdpDecoderRenderMpeg2(vldecoder->decoder, vldecoder->buffer,
-                                     vlsurf, (VdpPictureInfoMPEG1Or2 *)picture_info,
+      return vlVdpDecoderRenderMpeg2(vldecoder->decoder, vldecoder->buffer, vlsurf->video_buffer,
+                                     (VdpPictureInfoMPEG1Or2 *)picture_info,
                                      bitstream_buffer_count,bitstream_buffers);
       break;
 
-- 
cgit v1.2.3


From ee92f0fdad9bba687a24c072de8c00bb587a0f55 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 4 May 2011 19:35:19 +0200
Subject: vdpau: remove the unused and incomplete mpeg2 parser

---
 src/gallium/state_trackers/vdpau/Makefile          |   1 -
 .../state_trackers/vdpau/mpeg2_bitstream_parser.c  | 134 ---------------------
 .../state_trackers/vdpau/mpeg2_bitstream_parser.h  |  65 ----------
 3 files changed, 200 deletions(-)
 delete mode 100644 src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
 delete mode 100644 src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h

diff --git a/src/gallium/state_trackers/vdpau/Makefile b/src/gallium/state_trackers/vdpau/Makefile
index 0e68d4fe007..c1fd0eb7d0e 100644
--- a/src/gallium/state_trackers/vdpau/Makefile
+++ b/src/gallium/state_trackers/vdpau/Makefile
@@ -19,7 +19,6 @@ C_SOURCES = htab.c \
 	    decode.c \
 	    presentation.c \
 	    bitmap.c \
-	    mpeg2_bitstream_parser.c \
 	    output.c \
 	    preemption.c \
 	    mixer.c
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
deleted file mode 100644
index 182f3d44c45..00000000000
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010 Thomas Balling Sørensen.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-#include <stdio.h>
-#include <stdlib.h>
-#include "mpeg2_bitstream_parser.h"
-
-#if 0
-int
-vlVdpMPEG2NextStartCode(struct vdpMPEG2BitstreamParser *parser)
-{
-   uint32_t integer = 0xffffff00;
-   uint8_t * ptr_read = parser->ptr_bitstream;
-   int8_t * bytes_to_end;
-
-   bytes_to_end = parser->ptr_bitstream_end - parser->ptr_bitstream;
-
-   /* Read byte after byte, until startcode is found */
-   while(integer != 0x00000100) {
-      if (bytes_to_end <= 0) {
-         parser->state = MPEG2_BITSTREAM_DONE;
-         parser->code = 0;
-         return 0;
-      }
-      integer = ( integer | *ptr_read++ ) << 8;
-      bytes_to_end--;
-   }
-   parser->ptr_bitstream = ptr_read;
-   parser->code = parser->ptr_bitstream;
-   /* start_code found. rewind cursor a byte */
-   //parser->cursor -= 8;
-
-   return 0;
-}
-
-int
-vlVdpMPEG2BitstreamToMacroblock(struct pipe_screen *screen,
-                                VdpBitstreamBuffer const *bitstream_buffers,
-                                uint32_t bitstream_buffer_count,
-                                unsigned int *num_macroblocks,
-                                struct pipe_mpeg12_macroblock **pipe_macroblocks)
-{
-   bool b_header_done = false;
-   struct vdpMPEG2BitstreamParser parser;
-
-#if(1)
-   FILE *fp;
-
-   if ((fp = fopen("binout", "w"))==NULL) {
-      printf("Cannot open file.\n");
-      exit(1);
-   }
-   fwrite(bitstream_buffers[0].bitstream, 1, bitstream_buffers[0].bitstream_bytes, fp);
-   fclose(fp);
-
-#endif
-
-   debug_printf("[VDPAU] Starting decoding MPEG2 stream\n");
-
-   num_macroblocks[0] = 0;
-
-   memset(&parser,0,sizeof(parser));
-   parser.state = MPEG2_HEADER_START_CODE;
-   parser.ptr_bitstream = (unsigned char *)bitstream_buffers[0].bitstream;
-   parser.ptr_bitstream_end = parser.ptr_bitstream + bitstream_buffers[0].bitstream_bytes;
-
-   /* Main header parser loop */
-   while(!b_header_done) {
-      switch (parser.state) {
-      case MPEG2_SEEK_HEADER:
-         if (vlVdpMPEG2NextStartCode(&parser))
-            exit(1);
-         break;
-         /* Start_code found */
-         switch (parser.code) {
-         /* sequence_header_code */
-         case 0xB3:
-            debug_printf("[VDPAU][Bitstream parser] Sequence header code found\n");
-            /* We dont need to read this, because we already have this information */
-            break;
-         case 0xB5:
-            debug_printf("[VDPAU][Bitstream parser] Extension start code found\n");
-            //exit(1);
-            break;
-         case 0xB8:
-            debug_printf("[VDPAU][Bitstream parser] Extension start code found\n");
-            //exit(1);
-            break;
-         }
-         break;
-      case MPEG2_BITSTREAM_DONE:
-         if (parser.cur_bitstream < bitstream_buffer_count - 1) {
-            debug_printf("[VDPAU][Bitstream parser] Done parsing current bitstream. Moving to the next\n");
-            parser.cur_bitstream++;
-            parser.ptr_bitstream = (unsigned char *)bitstream_buffers[parser.cur_bitstream].bitstream;
-            parser.ptr_bitstream_end = parser.ptr_bitstream + bitstream_buffers[parser.cur_bitstream].bitstream_bytes;
-            parser.state = MPEG2_HEADER_START_CODE;
-         }
-         else {
-            debug_printf("[VDPAU][Bitstream parser] Done with frame\n");
-            exit(0);
-            // return 0;
-         }
-         break;
-      }
-   }
-
-   return 0;
-}
-
-#endif
diff --git a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h b/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
deleted file mode 100644
index 2f8a14996c5..00000000000
--- a/src/gallium/state_trackers/vdpau/mpeg2_bitstream_parser.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010 Thomas Balling Sørensen.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef MPEG2_BITSTREAM_PARSER_H
-#define MPEG2_BITSTREAM_PARSER_H
-
-#include <vdpau/vdpau.h>
-#include <pipe/p_video_state.h>
-#include "vdpau_private.h"
-
-enum vdpMPEG2States
-{
-   MPEG2_SEEK_HEADER,
-   MPEG2_HEADER_DONE,
-   MPEG2_BITSTREAM_DONE,
-   MPEG2_HEADER_START_CODE
-};
-
-struct vdpMPEG2BitstreamParser
-{
-   enum vdpMPEG2States state;
-   uint32_t cur_bitstream;
-   const uint8_t *ptr_bitstream_end;
-   const uint8_t *ptr_bitstream;
-   uint8_t code;
-
-   /* The decoded bitstream goes here: */
-   /* Sequence_header_info */
-   uint32_t horizontal_size_value;
-};
-
-#if 0
-int
-vlVdpMPEG2BitstreamToMacroblock(struct pipe_screen *screen,
-                                VdpBitstreamBuffer const *bitstream_buffers,
-                                uint32_t bitstream_buffer_count,
-                                unsigned int *num_macroblocks,
-                                struct pipe_mpeg12_macroblock **pipe_macroblocks);
-#endif
-
-#endif // MPEG2_BITSTREAM_PARSER_H
-- 
cgit v1.2.3


From ff20be919a44d1398f4f5789a56eb6c755a2d158 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 4 May 2011 19:38:40 +0200
Subject: [g3dvl] remove the unused bitstream parser components

---
 src/gallium/auxiliary/Makefile                 |   5 +-
 src/gallium/auxiliary/vl/vl_bitstream_parser.c | 208 -------------------------
 src/gallium/auxiliary/vl/vl_bitstream_parser.h |  67 --------
 3 files changed, 2 insertions(+), 278 deletions(-)
 delete mode 100644 src/gallium/auxiliary/vl/vl_bitstream_parser.c
 delete mode 100644 src/gallium/auxiliary/vl/vl_bitstream_parser.h

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index 3fd1d5923c0..a30accaa6ff 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -149,10 +149,9 @@ C_SOURCES = \
 	util/u_upload_mgr.c \
 	util/u_vbuf_mgr.c \
 	vl/vl_context.c \
-	vl/vl_bitstream_parser.c \
-	vl/vl_mpeg12_decoder.c \
-	vl/vl_compositor.c \
 	vl/vl_csc.c \
+	vl/vl_compositor.c \
+	vl/vl_mpeg12_decoder.c \
 	vl/vl_mpeg12_bitstream.c \
 	vl/vl_zscan.c \
         vl/vl_idct.c \
diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.c b/src/gallium/auxiliary/vl/vl_bitstream_parser.c
deleted file mode 100644
index f07b3443b92..00000000000
--- a/src/gallium/auxiliary/vl/vl_bitstream_parser.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#include "vl_bitstream_parser.h"
-#include <assert.h>
-#include <limits.h>
-#include <util/u_memory.h>
-#include <stdio.h>
-
-inline void endian_swap_ushort(unsigned short *x)
-{
-    x[0] = (x[0]>>8) | 
-        (x[0]<<8);
-}
-
-inline void endian_swap_uint(unsigned int *x)
-{
-    x[0] = (x[0]>>24) | 
-        ((x[0]<<8) & 0x00FF0000) |
-        ((x[0]>>8) & 0x0000FF00) |
-        (x[0]<<24);
-}
-
-inline void endian_swap_ulonglong(unsigned long long *x)
-{
-    x[0] = (x[0]>>56) | 
-        ((x[0]<<40) & 0x00FF000000000000) |
-        ((x[0]<<24) & 0x0000FF0000000000) |
-        ((x[0]<<8)  & 0x000000FF00000000) |
-        ((x[0]>>8)  & 0x00000000FF000000) |
-        ((x[0]>>24) & 0x0000000000FF0000) |
-        ((x[0]>>40) & 0x000000000000FF00) |
-        (x[0]<<56);
-}
-
-static unsigned
-grab_bits(unsigned cursor, unsigned how_many_bits, unsigned bitstream_elt)
-{
-   unsigned excess_bits = sizeof(unsigned) * CHAR_BIT - how_many_bits;
-	
-   assert(cursor < sizeof(unsigned) * CHAR_BIT);
-   assert(how_many_bits > 0 && how_many_bits <= sizeof(unsigned) * CHAR_BIT);
-   assert(cursor + how_many_bits <= sizeof(unsigned) * CHAR_BIT);
-   
-   #ifndef PIPE_ARCH_BIG_ENDIAN 
-   switch (sizeof(unsigned))  {
-	   case 2:
-			endian_swap_ushort(&bitstream_elt);
-			break;
-	   case 4:
-			endian_swap_uint(&bitstream_elt);
-			break;
-	   case 8:
-			endian_swap_ulonglong(&bitstream_elt);
-			break;
-   }
-   #endif // !PIPE_ARCH_BIG_ENDIAN 
-   
-	return (bitstream_elt << cursor) >> (excess_bits);
-}
-
-static unsigned
-show_bits(unsigned cursor, unsigned how_many_bits, const unsigned *bitstream)
-{	
-   unsigned cur_int = cursor / (sizeof(unsigned) * CHAR_BIT);
-   unsigned cur_bit = cursor % (sizeof(unsigned) * CHAR_BIT);
-	
-   assert(bitstream);
-	
-   if (cur_bit + how_many_bits > sizeof(unsigned) * CHAR_BIT) {
-      unsigned lower = grab_bits(cur_bit, sizeof(unsigned) * CHAR_BIT - cur_bit,
-                                 bitstream[cur_int]);
-      unsigned upper = grab_bits(0, cur_bit + how_many_bits - sizeof(unsigned) * CHAR_BIT,
-                                 bitstream[cur_int + 1]);
-      return lower | upper << (sizeof(unsigned) * CHAR_BIT - cur_bit);
-   }
-   else
-      return grab_bits(cur_bit, how_many_bits, bitstream[cur_int]);
-}
-
-bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser,
-                              unsigned num_bitstreams,
-                              const void **bitstreams,
-                              const unsigned *sizes)
-{
-   assert(parser);
-   assert(num_bitstreams);
-   assert(bitstreams);
-   assert(sizes);
-
-   parser->num_bitstreams = num_bitstreams;
-   parser->bitstreams = (const unsigned**)bitstreams;
-   parser->sizes = sizes;
-   parser->cur_bitstream = 0;
-   parser->cursor = 0;
-
-   return true;
-}
-
-void vl_bitstream_parser_cleanup(struct vl_bitstream_parser *parser)
-{
-   assert(parser);
-}
-
-unsigned
-vl_bitstream_parser_get_bits(struct vl_bitstream_parser *parser,
-                             unsigned how_many_bits)
-{
-   unsigned bits;
-
-   assert(parser);
-
-   bits = vl_bitstream_parser_show_bits(parser, how_many_bits);
-
-   vl_bitstream_parser_forward(parser, how_many_bits);
-
-   return bits;
-}
-
-unsigned
-vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser,
-                              unsigned how_many_bits)
-{	
-   unsigned bits = 0;
-   unsigned shift = 0;
-   unsigned cursor;
-   unsigned cur_bitstream;
-
-   assert(parser);
-
-   cursor = parser->cursor;
-   cur_bitstream = parser->cur_bitstream;
-
-   while (1) {
-      unsigned bits_left = parser->sizes[cur_bitstream] * CHAR_BIT - cursor;
-      unsigned bits_to_show = how_many_bits > bits_left ? bits_left : how_many_bits;
-
-      bits |= show_bits(cursor, bits_to_show,
-                        parser->bitstreams[cur_bitstream]) << shift;
-		
-      if (how_many_bits > bits_to_show) {
-         how_many_bits -= bits_to_show;
-         cursor = 0;
-         ++cur_bitstream;
-         shift += bits_to_show;
-      }
-      else
-         break;
-   }
-
-   return bits;
-}
-
-void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser,
-                                 unsigned how_many_bits)
-{
-   assert(parser);
-   assert(how_many_bits);
-
-   parser->cursor += how_many_bits;
-
-   while (parser->cursor > parser->sizes[parser->cur_bitstream] * CHAR_BIT) {
-      parser->cursor -= parser->sizes[parser->cur_bitstream++] * CHAR_BIT;
-      assert(parser->cur_bitstream < parser->num_bitstreams);
-   }
-}
-
-void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser,
-                                unsigned how_many_bits)
-{
-   signed c;
-	
-   assert(parser);
-   assert(how_many_bits);
-	
-   c = parser->cursor - how_many_bits;
-
-   while (c < 0) {
-      c += parser->sizes[parser->cur_bitstream--] * CHAR_BIT;
-      assert(parser->cur_bitstream < parser->num_bitstreams);
-   }
-
-   parser->cursor = (unsigned)c;
-}
diff --git a/src/gallium/auxiliary/vl/vl_bitstream_parser.h b/src/gallium/auxiliary/vl/vl_bitstream_parser.h
deleted file mode 100644
index eeb51dd4295..00000000000
--- a/src/gallium/auxiliary/vl/vl_bitstream_parser.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-#ifndef vl_bitstream_parser_h
-#define vl_bitstream_parser_h
-
-#include "pipe/p_compiler.h"
-
-struct vl_bitstream_parser
-{
-   unsigned num_bitstreams;
-   const unsigned **bitstreams;
-   const unsigned *sizes;
-   unsigned cur_bitstream;
-   unsigned cursor;
-};
-
-inline void endian_swap_ushort(unsigned short *x);
-inline void endian_swap_uint(unsigned int *x);
-inline void endian_swap_ulonglong(unsigned long long *x);
-
-bool vl_bitstream_parser_init(struct vl_bitstream_parser *parser,
-                              unsigned num_bitstreams,
-                              const void **bitstreams,
-                              const unsigned *sizes);
-
-void vl_bitstream_parser_cleanup(struct vl_bitstream_parser *parser);
-
-unsigned
-vl_bitstream_parser_get_bits(struct vl_bitstream_parser *parser,
-                             unsigned how_many_bits);
-
-unsigned
-vl_bitstream_parser_show_bits(struct vl_bitstream_parser *parser,
-                              unsigned how_many_bits);
-
-void vl_bitstream_parser_forward(struct vl_bitstream_parser *parser,
-                                 unsigned how_many_bits);
-
-void vl_bitstream_parser_rewind(struct vl_bitstream_parser *parser,
-                                unsigned how_many_bits);
-
-#endif /* vl_bitstream_parser_h */
-- 
cgit v1.2.3


From 7709e6ebc3d9f159063e40ca217d61f20ce507f0 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 4 May 2011 19:44:20 +0200
Subject: vdpau: remove empty color.c file

---
 src/gallium/state_trackers/vdpau/color.c | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 src/gallium/state_trackers/vdpau/color.c

diff --git a/src/gallium/state_trackers/vdpau/color.c b/src/gallium/state_trackers/vdpau/color.c
deleted file mode 100644
index e69de29bb2d..00000000000
-- 
cgit v1.2.3


From 08f3a7cf7e9133f50adf33f800aa3696c909347f Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 4 May 2011 19:58:33 +0200
Subject: vdpau: make state tracker far less noisy

---
 src/gallium/state_trackers/vdpau/bitmap.c        |  4 ++--
 src/gallium/state_trackers/vdpau/decode.c        | 10 ++++-----
 src/gallium/state_trackers/vdpau/device.c        | 12 +++++-----
 src/gallium/state_trackers/vdpau/mixer.c         |  8 +++----
 src/gallium/state_trackers/vdpau/output.c        |  4 ++--
 src/gallium/state_trackers/vdpau/presentation.c  |  6 ++---
 src/gallium/state_trackers/vdpau/query.c         | 18 +++++++--------
 src/gallium/state_trackers/vdpau/surface.c       |  2 +-
 src/gallium/state_trackers/vdpau/vdpau_private.h | 28 +++++++++++++++++++++++-
 9 files changed, 59 insertions(+), 33 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/bitmap.c b/src/gallium/state_trackers/vdpau/bitmap.c
index b570f2752e4..e336568df47 100644
--- a/src/gallium/state_trackers/vdpau/bitmap.c
+++ b/src/gallium/state_trackers/vdpau/bitmap.c
@@ -26,8 +26,8 @@
  **************************************************************************/
 
 #include <vdpau/vdpau.h>
+
 #include "vdpau_private.h"
-#include <util/u_debug.h>
 
 VdpStatus
 vlVdpBitmapSurfaceCreate(VdpDevice device,
@@ -36,7 +36,7 @@ vlVdpBitmapSurfaceCreate(VdpDevice device,
                          VdpBool frequently_accessed,
                          VdpBitmapSurface *surface)
 {
-   debug_printf("[VDPAU] Creating a bitmap surface\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Creating a bitmap surface\n");
    if (!surface)
       return VDP_STATUS_INVALID_POINTER;
 
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 8e09cb61b11..3a366d35a93 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -46,7 +46,7 @@ vlVdpDecoderCreate(VdpDevice device,
    vlVdpDecoder *vldecoder;
    VdpStatus ret;
 
-   debug_printf("[VDPAU] Creating decoder\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Creating decoder\n");
 
    if (!decoder)
       return VDP_STATUS_INVALID_POINTER;
@@ -95,7 +95,7 @@ vlVdpDecoderCreate(VdpDevice device,
       goto error_handle;
    }
 
-   debug_printf("[VDPAU] Decoder created succesfully\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Decoder created succesfully\n");
 
    return VDP_STATUS_OK;
 
@@ -115,7 +115,7 @@ vlVdpDecoderDestroy(VdpDecoder decoder)
 {
    vlVdpDecoder *vldecoder;
 
-   debug_printf("[VDPAU] Destroying decoder\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Destroying decoder\n");
 
    vldecoder = (vlVdpDecoder *)vlGetDataHTAB(decoder);
    if (!vldecoder)
@@ -151,7 +151,7 @@ vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder,
    unsigned num_ycbcr_blocks[3] = { 0, 0, 0 };
    unsigned i;
 
-   debug_printf("[VDPAU] Decoding MPEG2\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Decoding MPEG2\n");
 
    /* if surfaces equals VDP_STATUS_INVALID_HANDLE, they are not used */
    if (picture_info->forward_reference ==  VDP_INVALID_HANDLE)
@@ -210,7 +210,7 @@ vlVdpDecoderRender(VdpDecoder decoder,
    vlVdpDecoder *vldecoder;
    vlVdpSurface *vlsurf;
 
-   debug_printf("[VDPAU] Decoding\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Decoding\n");
 
    if (!(picture_info && bitstream_buffers))
       return VDP_STATUS_INVALID_POINTER;
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index c0bf56edc2e..30c6b7aae4b 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -77,7 +77,7 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
    }
 
    *get_proc_address = &vlVdpGetProcAddress;
-   debug_printf("[VDPAU] Device created succesfully\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Device created succesfully\n");
 
    return VDP_STATUS_OK;
 
@@ -100,7 +100,7 @@ vlVdpPresentationQueueTargetCreateX11(VdpDevice device, Drawable drawable,
    vlVdpPresentationQueueTarget *pqt;
    VdpStatus ret;
 
-   debug_printf("[VDPAU] Creating PresentationQueueTarget\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Creating PresentationQueueTarget\n");
 
    if (!drawable)
       return VDP_STATUS_INVALID_HANDLE;
@@ -134,7 +134,7 @@ vlVdpPresentationQueueTargetDestroy(VdpPresentationQueueTarget presentation_queu
 {
    vlVdpPresentationQueueTarget *pqt;
 
-   debug_printf("[VDPAU] Destroying PresentationQueueTarget\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Destroying PresentationQueueTarget\n");
 
    pqt = vlGetDataHTAB(presentation_queue_target);
    if (!pqt)
@@ -149,7 +149,7 @@ vlVdpPresentationQueueTargetDestroy(VdpPresentationQueueTarget presentation_queu
 VdpStatus
 vlVdpDeviceDestroy(VdpDevice device)
 {
-   debug_printf("[VDPAU] Destroying destroy\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Destroying destroy\n");
 
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)
@@ -158,7 +158,7 @@ vlVdpDeviceDestroy(VdpDevice device)
    FREE(dev);
    vlDestroyHTAB();
 
-   debug_printf("[VDPAU] Device destroyed succesfully\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Device destroyed succesfully\n");
 
    return VDP_STATUS_OK;
 }
@@ -176,7 +176,7 @@ vlVdpGetProcAddress(VdpDevice device, VdpFuncId function_id, void **function_poi
    if (!vlGetFuncFTAB(function_id, function_pointer))
       return VDP_STATUS_INVALID_FUNC_ID;
 
-   debug_printf("[VDPAU] Got proc adress %p for id %d\n", *function_pointer, function_id);
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Got proc adress %p for id %d\n", *function_pointer, function_id);
 
    return VDP_STATUS_OK;
 }
diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index 0c0d200c6d4..85f4e1541ab 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -48,7 +48,7 @@ vlVdpVideoMixerCreate(VdpDevice device,
    VdpStatus ret;
    float csc[16];
 
-   debug_printf("[VDPAU] Creating VideoMixer\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Creating VideoMixer\n");
 
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)
@@ -91,7 +91,7 @@ vlVdpVideoMixerDestroy(VdpVideoMixer mixer)
 {
    vlVdpVideoMixer *vmixer;
 
-   debug_printf("[VDPAU] Destroying VideoMixer\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Destroying VideoMixer\n");
 
    vmixer = vlGetDataHTAB(mixer);
    if (!vmixer)
@@ -110,7 +110,7 @@ vlVdpVideoMixerSetFeatureEnables(VdpVideoMixer mixer,
                                  VdpVideoMixerFeature const *features,
                                  VdpBool const *feature_enables)
 {
-   debug_printf("[VDPAU] Setting VideoMixer features\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Setting VideoMixer features\n");
 
    if (!(features && feature_enables))
       return VDP_STATUS_INVALID_POINTER;
@@ -228,7 +228,7 @@ vlVdpGenerateCSCMatrix(VdpProcamp *procamp,
                        VdpColorStandard standard,
                        VdpCSCMatrix *csc_matrix)
 {
-   debug_printf("[VDPAU] Generating CSCMatrix\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Generating CSCMatrix\n");
    if (!(csc_matrix && procamp))
       return VDP_STATUS_INVALID_POINTER;
 
diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
index 35b75ab3aa7..0257d38017a 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -46,7 +46,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
 
    vlVdpOutputSurface *vlsurface = NULL;
 
-   debug_printf("[VDPAU] Creating output surface\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Creating output surface\n");
    if (!(width && height))
       return VDP_STATUS_INVALID_SIZE;
 
@@ -110,7 +110,7 @@ vlVdpOutputSurfaceDestroy(VdpOutputSurface surface)
 {
    vlVdpOutputSurface *vlsurface;
 
-   debug_printf("[VDPAU] Destroying output surface\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Destroying output surface\n");
 
    vlsurface = vlGetDataHTAB(surface);
    if (!vlsurface)
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index 1e2c78616e9..2f029f07fb1 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -43,7 +43,7 @@ vlVdpPresentationQueueCreate(VdpDevice device,
    struct pipe_video_context *context;
    VdpStatus ret;
 
-   _debug_printf("[VDPAU] Creating PresentationQueue\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Creating PresentationQueue\n");
 
    if (!presentation_queue)
       return VDP_STATUS_INVALID_POINTER;
@@ -91,7 +91,7 @@ vlVdpPresentationQueueDestroy(VdpPresentationQueue presentation_queue)
 {
    vlVdpPresentationQueue *pq;
 
-   _debug_printf("[VDPAU] Destroying PresentationQueue\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Destroying PresentationQueue\n");
 
    pq = vlGetDataHTAB(presentation_queue);
    if (!pq)
@@ -183,7 +183,7 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
 
       sprintf(cmd, "xwd -id %d -out vdpau_frame_%08d.xwd", (int)pq->drawable, ++framenum);
       if (system(cmd) != 0)
-         _debug_printf("[XvMC] Dumping surface %d failed.\n", surface);
+         VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Dumping surface %d failed.\n", surface);
    }
 
    return VDP_STATUS_OK;
diff --git a/src/gallium/state_trackers/vdpau/query.c b/src/gallium/state_trackers/vdpau/query.c
index 97522057a10..abe5b8f6701 100644
--- a/src/gallium/state_trackers/vdpau/query.c
+++ b/src/gallium/state_trackers/vdpau/query.c
@@ -62,7 +62,7 @@ vlVdpVideoSurfaceQueryCapabilities(VdpDevice device, VdpChromaType surface_chrom
    uint32_t max_2d_texture_level;
    VdpStatus ret;
 
-   debug_printf("[VDPAU] Querying video surfaces\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying video surfaces\n");
 
    if (!(is_supported && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
@@ -105,7 +105,7 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
 {
    struct vl_screen *vlscreen;
 
-   debug_printf("[VDPAU] Querying get put video surfaces\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying get put video surfaces\n");
 
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
@@ -141,7 +141,7 @@ vlVdpDecoderQueryCapabilities(VdpDevice device, VdpDecoderProfile profile,
    uint32_t max_2d_texture_level;
    struct vl_screen *vlscreen;
 
-   debug_printf("[VDPAU] Querying decoder\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying decoder\n");
 
    if (!(is_supported && max_level && max_macroblocks && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
@@ -189,7 +189,7 @@ vlVdpOutputSurfaceQueryCapabilities(VdpDevice device, VdpRGBAFormat surface_rgba
    if (!(is_supported && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
 
-   debug_printf("[VDPAU] Querying ouput surfaces\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying ouput surfaces\n");
 
    return VDP_STATUS_NO_IMPLEMENTATION;
 }
@@ -198,7 +198,7 @@ VdpStatus
 vlVdpOutputSurfaceQueryGetPutBitsNativeCapabilities(VdpDevice device, VdpRGBAFormat surface_rgba_format,
                                                     VdpBool *is_supported)
 {
-   debug_printf("[VDPAU] Querying output surfaces get put native cap\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying output surfaces get put native cap\n");
 
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
@@ -213,7 +213,7 @@ vlVdpOutputSurfaceQueryPutBitsIndexedCapabilities(VdpDevice device,
                                                   VdpColorTableFormat color_table_format,
                                                   VdpBool *is_supported)
 {
-   debug_printf("[VDPAU] Querying output surfaces get put indexed cap\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying output surfaces get put indexed cap\n");
 
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
@@ -226,7 +226,7 @@ vlVdpOutputSurfaceQueryPutBitsYCbCrCapabilities(VdpDevice device, VdpRGBAFormat
                                                 VdpYCbCrFormat bits_ycbcr_format,
                                                 VdpBool *is_supported)
 {
-   debug_printf("[VDPAU] Querying output surfaces put ycrcb cap\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying output surfaces put ycrcb cap\n");
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
 
@@ -237,7 +237,7 @@ VdpStatus
 vlVdpBitmapSurfaceQueryCapabilities(VdpDevice device, VdpRGBAFormat surface_rgba_format,
                                     VdpBool *is_supported, uint32_t *max_width, uint32_t *max_height)
 {
-   debug_printf("[VDPAU] Querying bitmap surfaces\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying bitmap surfaces\n");
    if (!(is_supported && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
 
@@ -248,7 +248,7 @@ VdpStatus
 vlVdpVideoMixerQueryFeatureSupport(VdpDevice device, VdpVideoMixerFeature feature,
                                    VdpBool *is_supported)
 {
-   debug_printf("[VDPAU] Querying mixer feature support\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying mixer feature support\n");
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
 
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index dcbc6e61916..c2945c787da 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -44,7 +44,7 @@ vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
    vlVdpSurface *p_surf;
    VdpStatus ret;
 
-   _debug_printf("[VDPAU] Creating a surface\n");
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Creating a surface\n");
 
    if (!(width && height)) {
       ret = VDP_STATUS_INVALID_SIZE;
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 5931a2dda34..cbc51d57e36 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -28,12 +28,17 @@
 #ifndef VDPAU_PRIVATE_H
 #define VDPAU_PRIVATE_H
 
+#include <assert.h>
+
 #include <vdpau/vdpau.h>
 #include <vdpau/vdpau_x11.h>
+
 #include <pipe/p_compiler.h>
 #include <pipe/p_video_context.h>
+
+#include <util/u_debug.h>
+
 #include <vl_winsys.h>
-#include <assert.h>
 
 #define INFORMATION G3DVL VDPAU Driver Shared Library version VER_MAJOR.VER_MINOR
 #define QUOTEME(x) #x
@@ -288,4 +293,25 @@ VdpVideoMixerGetAttributeValues vlVdpVideoMixerGetAttributeValues;
 VdpVideoMixerDestroy vlVdpVideoMixerDestroy;
 VdpGenerateCSCMatrix vlVdpGenerateCSCMatrix;
 
+#define VDPAU_OUT   0
+#define VDPAU_ERR   1
+#define VDPAU_WARN  2
+#define VDPAU_TRACE 3
+
+static inline void VDPAU_MSG(unsigned int level, const char *fmt, ...)
+{
+   static int debug_level = -1;
+
+   if (debug_level == -1) {
+      debug_level = MAX2(debug_get_num_option("VDPAU_DEBUG", 0), 0);
+   }
+
+   if (level <= debug_level) {
+      va_list ap;
+      va_start(ap, fmt);
+      _debug_vprintf(fmt, ap);
+      va_end(ap);
+   }
+}
+
 #endif // VDPAU_PRIVATE_H
-- 
cgit v1.2.3


From 0f24c19eea80290f533d69403586d9fc6f4b36f7 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 4 May 2011 20:39:54 +0200
Subject: [g3dvl] remove the dubble zscan from the mpg12 decoder

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 82 ++++++++------------------
 1 file changed, 25 insertions(+), 57 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 8955ad5175b..da00f3730a1 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -480,21 +480,6 @@ static const uint8_t mpeg2_scan_alt_orig[64] =
    53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63
 };
 
-static uint8_t mpeg2_scan_alt_ptable[64];
-static uint8_t mpeg2_scan_norm_ptable[64];
-static uint8_t mpeg2_scan_orig_ptable[64];
-
-static inline void
-setup_scan_ptable( void )
-{
-   int i;
-   for (i=0; i<64; ++i) {
-      mpeg2_scan_norm_ptable[mpeg2_scan_norm_orig[i]] = mpeg2_scan_norm_orig[i];
-      mpeg2_scan_alt_ptable[mpeg2_scan_alt_orig[i]] = mpeg2_scan_alt_orig[i];
-      mpeg2_scan_orig_ptable[i] = i;
-   }
-}
-
 static const int non_linear_quantizer_scale[] = {
    0,  1,  2,  3,  4,  5,   6,   7,
    8, 10, 12, 14, 16, 18,  20,  22,
@@ -765,9 +750,8 @@ get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs)
 static inline void
 get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
 {
-   int i, j, l, val;
+   int i, j, val;
    const uint8_t *scan;
-   uint8_t *scan_ptable;
    uint8_t *quant_matrix = picture->intra_quantizer_matrix;
    int quantizer_scale = picture->quantizer_scale;
    int mismatch;
@@ -775,10 +759,8 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 
    if (!picture->alternate_scan) {
       scan =  mpeg2_scan_norm_orig;
-      scan_ptable = mpeg2_scan_norm_ptable;
    } else {
       scan = mpeg2_scan_alt_orig;
-      scan_ptable = mpeg2_scan_alt_ptable;
    }
 
    i = 0;
@@ -796,11 +778,11 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
             break;	/* end of block */
 
       normal_code:
-         l = scan_ptable[j = scan[i]];
+         j = scan[i];
 
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4;
+         val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
 
          /* if (bitstream_get (1)) val = -val; */
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
@@ -828,11 +810,11 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          if (i >= 64)
             break;	/* illegal, check needed to avoid buffer overflow */
 
-         l = scan_ptable[j = scan[i]];
+         j = scan[i];
 
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
-         val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[l]) / 16;
+         val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[j]) / 16;
 
          SATURATE (val);
          dest[j] = val;
@@ -876,9 +858,8 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 static inline void
 get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
 {
-   int i, j, l, val;
+   int i, j, val;
    const uint8_t *scan;
-   uint8_t *scan_ptable;
    uint8_t *quant_matrix = picture->intra_quantizer_matrix;
    int quantizer_scale = picture->quantizer_scale;
    int mismatch;
@@ -886,10 +867,8 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 
    if (!picture->alternate_scan) {
       scan =  mpeg2_scan_norm_orig;
-      scan_ptable = mpeg2_scan_norm_ptable;
    } else {
       scan = mpeg2_scan_alt_orig;
-      scan_ptable = mpeg2_scan_alt_ptable;
    }
 
    i = 0;
@@ -906,10 +885,10 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          if (i < 64) {
 
          normal_code:
-            l = scan_ptable[j = scan[i]];
+            j = scan[i];
             bs->vlc.buf <<= tab->len;
             bs->vlc.bits += tab->len + 1;
-            val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4;
+            val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
 
             /* if (bitstream_get (1)) val = -val; */
             val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
@@ -936,11 +915,11 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
             if (i >= 64)
                 break;	/* illegal, check against buffer overflow */
 
-            l = scan_ptable[j = scan[i]];
+            j = scan[i];
 
             vl_vlc_dumpbits(&bs->vlc, 12);
             vl_vlc_needbits(&bs->vlc);
-            val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[l]) / 16;
+            val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[j]) / 16;
 
             SATURATE (val);
             dest[j] = val;
@@ -985,9 +964,8 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 static inline void
 get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
 {
-   int i, j, l, val;
+   int i, j, val;
    const uint8_t *scan;
-   uint8_t *scan_ptable;
    uint8_t *quant_matrix = picture->non_intra_quantizer_matrix;
    int quantizer_scale = picture->quantizer_scale;
    int mismatch;
@@ -998,10 +976,8 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 
    if (!picture->alternate_scan) {
       scan =  mpeg2_scan_norm_orig;
-      scan_ptable = mpeg2_scan_norm_ptable;
    } else {
       scan = mpeg2_scan_alt_orig;
-      scan_ptable = mpeg2_scan_alt_ptable;
    }
 
    vl_vlc_needbits(&bs->vlc);
@@ -1022,10 +998,10 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
             break;	/* end of block */
 
       normal_code:
-         l = scan_ptable[j = scan[i]];
+         j = scan[i];
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[l]) >> 5;
+         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;
 
          /* if (bitstream_get (1)) val = -val; */
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
@@ -1056,12 +1032,12 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          if (i >= 64)
             break;	/* illegal, check needed to avoid buffer overflow */
 
-         l = scan_ptable[j = scan[i]];
+         j = scan[i];
 
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
          val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1;
-         val = (val * quantizer_scale * quant_matrix[l]) / 32;
+         val = (val * quantizer_scale * quant_matrix[j]) / 32;
 
          SATURATE (val);
          dest[j] = val;
@@ -1104,9 +1080,8 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 static inline void
 get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
 {
-   int i, j, l, val;
+   int i, j, val;
    const uint8_t *scan;
-   uint8_t *scan_ptable;
    uint8_t *quant_matrix = picture->intra_quantizer_matrix;
    int quantizer_scale = picture->quantizer_scale;
    const DCTtab * tab;
@@ -1115,10 +1090,8 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
 
    if (!picture->alternate_scan) {
       scan =  mpeg2_scan_norm_orig;
-      scan_ptable = mpeg2_scan_norm_ptable;
    } else {
       scan = mpeg2_scan_alt_orig;
-      scan_ptable = mpeg2_scan_alt_ptable;
    }
 
    vl_vlc_needbits(&bs->vlc);
@@ -1133,10 +1106,10 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
             break;	/* end of block */
 
       normal_code:
-         l = scan_ptable[j = scan[i]];
+         j = scan[i];
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4;
+         val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
 
          /* oddification */
          val = (val - 1) | 1;
@@ -1166,7 +1139,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
          if (i >= 64)
             break;	/* illegal, check needed to avoid buffer overflow */
 
-         l = scan_ptable[j = scan[i]];
+         j = scan[i];
 
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
@@ -1175,7 +1148,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
             vl_vlc_dumpbits(&bs->vlc, 8);
             val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
          }
-         val = (val * quantizer_scale * quant_matrix[l]) / 16;
+         val = (val * quantizer_scale * quant_matrix[j]) / 16;
 
          /* oddification */
          val = (val + ~SBITS (val, 1)) | 1;
@@ -1219,9 +1192,8 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
 static inline void
 get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
 {
-   int i, j, l, val;
+   int i, j, val;
    const uint8_t * scan;
-   uint8_t *scan_ptable;
    uint8_t *quant_matrix = picture->non_intra_quantizer_matrix;
    int quantizer_scale = picture->quantizer_scale;
    const DCTtab * tab;
@@ -1230,10 +1202,8 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
 
    if (!picture->alternate_scan) {
       scan =  mpeg2_scan_norm_orig;
-      scan_ptable = mpeg2_scan_norm_ptable;
    } else {
       scan = mpeg2_scan_alt_orig;
-      scan_ptable = mpeg2_scan_alt_ptable;
    }
 
    vl_vlc_needbits(&bs->vlc);
@@ -1254,10 +1224,10 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
             break;	/* end of block */
 
       normal_code:
-         l = scan_ptable[j = scan[i]];
+         j = scan[i];
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[l]) >> 5;
+         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;
 
          /* oddification */
          val = (val - 1) | 1;
@@ -1290,7 +1260,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
          if (i >= 64)
             break;	/* illegal, check needed to avoid buffer overflow */
 
-         l = scan_ptable[j = scan[i]];
+         j = scan[i];
 
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
@@ -1300,7 +1270,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
             val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
          }
          val = 2 * (val + SBITS (val, 1)) + 1;
-         val = (val * quantizer_scale * quant_matrix[l]) / 32;
+         val = (val * quantizer_scale * quant_matrix[j]) / 32;
 
          /* oddification */
          val = (val + ~SBITS (val, 1)) | 1;
@@ -1907,8 +1877,6 @@ vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height)
 
    bs->width = width;
    bs->height = height;
-
-   setup_scan_ptable();
 }
 
 void
-- 
cgit v1.2.3


From e3789105fe3a289338821a53da499857aa924637 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 4 May 2011 22:05:03 +0200
Subject: [g3dvl] divide mpg12 width height by 16

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 19 ++++++++-----------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   |  4 +++-
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index da00f3730a1..142bcaba080 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -1592,12 +1592,12 @@ do {							\
 
 #define NEXT_MACROBLOCK		                \
 do {				                \
-   bs->mv_stream[0][x+y*bs->width/16] = mv_fwd; \
-   bs->mv_stream[1][x+y*bs->width/16] = mv_bwd; \
+   bs->mv_stream[0][x+y*bs->width] = mv_fwd;    \
+   bs->mv_stream[1][x+y*bs->width] = mv_bwd;    \
    ++x;				                \
-   if (x == bs->width/16) {	                \
+   if (x == bs->width) {	                \
       ++y;                                      \
-      if (y >= bs->height/16)                   \
+      if (y >= bs->height)                      \
          return false;                          \
       x = 0;                                    \
    }                                            \
@@ -1613,7 +1613,7 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, in
       if(!vl_vlc_getbyte(&bs->vlc))
          return false;
    }
-   *y = ((bs->vlc.buf & 0xFF) - 1) * 16;
+   *y = (bs->vlc.buf & 0xFF) - 1;
    vl_vlc_restart(&bs->vlc);
 
    //TODO conversion to signed format signed format
@@ -1652,11 +1652,11 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, in
       }
    }
    vl_vlc_dumpbits(&bs->vlc, mba->len + 1);
-   *x = (*x + mba->mba) << 4;
+   *x += mba->mba;
 
    while (*x >= bs->width) {
       *x -= bs->width;
-      *y += 16;
+      (*y)++;
    }
    if (*y > bs->height)
       return false;
@@ -1680,9 +1680,6 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
    mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
    mv_bwd.top.field_select = mv_bwd.bottom.field_select = PIPE_VIDEO_FRAME;
 
-   x /= 16;
-   y /= 16;
-
    while (1) {
       int macroblock_modes;
       int mba_inc;
@@ -1897,7 +1894,7 @@ vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct pipe_ycbcr_block *ycbcr_s
       bs->mv_stream[i] = mv_stream[i];
 
    // TODO
-   for (i = 0; i < bs->width/16*bs->height/16; ++i) {
+   for (i = 0; i < bs->width*bs->height; ++i) {
       bs->mv_stream[0][i].top.x = bs->mv_stream[0][i].top.y = 0;
       bs->mv_stream[0][i].top.field_select = PIPE_VIDEO_FRAME;
       bs->mv_stream[0][i].top.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index b78844b9cfd..4ac3b90ad78 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -505,7 +505,9 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
       goto error_zscan;
 
    if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM)
-      vl_mpg12_bs_init(&buffer->bs, dec->base.width, dec->base.height);
+      vl_mpg12_bs_init(&buffer->bs,
+                       dec->base.width / MACROBLOCK_WIDTH,
+                       dec->base.height / MACROBLOCK_HEIGHT);
 
    return &buffer->base;
 
-- 
cgit v1.2.3


From a9b1c4fe2e67c5b158056a05cbc394d62c1d3e40 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 4 May 2011 22:16:03 +0200
Subject: [g3dvl] remove dc_dct_pred from picture structure

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 31 +++++++++++++-------------
 src/gallium/include/pipe/p_video_state.h       |  3 ---
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 142bcaba080..221ebdd391e 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -1313,7 +1313,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
 
 static inline void
 slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
-                unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding)
+                unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int dc_dct_pred[3])
 {
    short *dest = bs->ycbcr_buffer[cc];
 
@@ -1326,12 +1326,12 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
 
    /* Get the intra DC coefficient and inverse quantize it */
    if (cc == 0)
-      picture->dc_dct_pred[0] += get_luma_dc_dct_diff(bs);
+      dc_dct_pred[0] += get_luma_dc_dct_diff(bs);
    else
-      picture->dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs);
+      dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs);
 
    memset(dest, 0, sizeof(int16_t) * 64);
-   dest[0] = picture->dc_dct_pred[cc] << (3 - picture->intra_dc_precision);
+   dest[0] = dc_dct_pred[cc] << (3 - picture->intra_dc_precision);
    if (picture->mpeg1) {
       if (picture->picture_coding_type != D_TYPE)
           get_mpeg1_intra_block(bs, picture, dest);
@@ -1616,9 +1616,6 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, in
    *y = (bs->vlc.buf & 0xFF) - 1;
    vl_vlc_restart(&bs->vlc);
 
-   //TODO conversion to signed format signed format
-   picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = picture->dc_dct_pred[2] = 0;
-
    picture->quantizer_scale = get_quantizer_scale(bs, picture);
 
    /* ignore intra_slice and all the extra data */
@@ -1669,6 +1666,10 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
 {
    struct pipe_motionvector mv_fwd, mv_bwd;
    enum pipe_mpeg12_dct_type dct_type;
+
+   /* predictor for DC coefficients in intra blocks */
+   int dc_dct_pred[3] = { 0, 0, 0 };
+
    int x, y;
 
    if (!slice_init(bs, picture, &x, &y))
@@ -1729,12 +1730,12 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
          mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
 
          // unravaled loop of 6 block(i) calls in macroblock()
-         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type);
-         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type);
-         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type);
-         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type);
-         slice_intra_DCT(bs, picture, 1, x, y, dct_type);
-         slice_intra_DCT(bs, picture, 2, x, y, dct_type);
+         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 1, x, y, dct_type, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 2, x, y, dct_type, dc_dct_pred);
 
          if (picture->picture_coding_type == D_TYPE) {
             vl_vlc_needbits(&bs->vlc);
@@ -1805,7 +1806,7 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
                slice_non_intra_DCT(bs, picture, 2, x, y, dct_type); // cc2 croma
          }
 
-         picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = picture->dc_dct_pred[2] = 0;
+         dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
       }
 
       NEXT_MACROBLOCK;
@@ -1835,7 +1836,7 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
       mba_inc += mba->mba;
       if (mba_inc) {
          //TODO  conversion to signed format signed format
-         picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = picture->dc_dct_pred[2] = 0;
+         dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
 
          switch(picture->picture_structure) {
          case FRAME_PICTURE:
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 54fb1b7a595..828bebb56ec 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -122,9 +122,6 @@ struct pipe_mpeg12_picture_desc
    uint8_t *intra_quantizer_matrix;
    uint8_t *non_intra_quantizer_matrix;
 
-   /* predictor for DC coefficients in intra blocks */
-   int16_t dc_dct_pred[3];
-
    int quantizer_scale;
 
 #if 0
-- 
cgit v1.2.3


From 352bfb525ab4858ac1a5710cc8d629764cf6bd72 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 4 May 2011 22:30:16 +0200
Subject: [g3dvl] remove quantizer_scale from picture structure

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 70 +++++++++++++-------------
 src/gallium/include/pipe/p_video_state.h       |  2 -
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 221ebdd391e..aee804a5de1 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -748,12 +748,12 @@ get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs)
 }
 
 static inline void
-get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
+get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
+                    int quantizer_scale, short *dest)
 {
    int i, j, val;
    const uint8_t *scan;
    uint8_t *quant_matrix = picture->intra_quantizer_matrix;
-   int quantizer_scale = picture->quantizer_scale;
    int mismatch;
    const DCTtab *tab;
 
@@ -856,12 +856,12 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 }
 
 static inline void
-get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
+get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
+                    int quantizer_scale, short *dest)
 {
    int i, j, val;
    const uint8_t *scan;
    uint8_t *quant_matrix = picture->intra_quantizer_matrix;
-   int quantizer_scale = picture->quantizer_scale;
    int mismatch;
    const DCTtab * tab;
 
@@ -962,12 +962,12 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 }
 
 static inline void
-get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
+get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
+                    int quantizer_scale, short *dest)
 {
    int i, j, val;
    const uint8_t *scan;
    uint8_t *quant_matrix = picture->non_intra_quantizer_matrix;
-   int quantizer_scale = picture->quantizer_scale;
    int mismatch;
    const DCTtab *tab;
 
@@ -1078,12 +1078,12 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 }
 
 static inline void
-get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
+get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
+                      int quantizer_scale, short *dest)
 {
    int i, j, val;
    const uint8_t *scan;
    uint8_t *quant_matrix = picture->intra_quantizer_matrix;
-   int quantizer_scale = picture->quantizer_scale;
    const DCTtab * tab;
 
    i = 0;
@@ -1190,12 +1190,12 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
 }
 
 static inline void
-get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, short *dest)
+get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
+                          int quantizer_scale, short *dest)
 {
    int i, j, val;
    const uint8_t * scan;
    uint8_t *quant_matrix = picture->non_intra_quantizer_matrix;
-   int quantizer_scale = picture->quantizer_scale;
    const DCTtab * tab;
 
    i = -1;
@@ -1312,8 +1312,8 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
 }
 
 static inline void
-slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
-                unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int dc_dct_pred[3])
+slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc, unsigned x, unsigned y,
+                enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3])
 {
    short *dest = bs->ycbcr_buffer[cc];
 
@@ -1334,11 +1334,11 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
    dest[0] = dc_dct_pred[cc] << (3 - picture->intra_dc_precision);
    if (picture->mpeg1) {
       if (picture->picture_coding_type != D_TYPE)
-          get_mpeg1_intra_block(bs, picture, dest);
+          get_mpeg1_intra_block(bs, picture, quantizer_scale, dest);
    } else if (picture->intra_vlc_format)
-      get_intra_block_B15(bs, picture, dest);
+      get_intra_block_B15(bs, picture, quantizer_scale, dest);
    else
-      get_intra_block_B14(bs, picture, dest);
+      get_intra_block_B14(bs, picture, quantizer_scale, dest);
 
    bs->num_ycbcr_blocks[cc]++;
    bs->ycbcr_stream[cc]++;
@@ -1347,7 +1347,7 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
 
 static inline void
 slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
-                    unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding)
+                    unsigned x, unsigned y, int quantizer_scale, enum pipe_mpeg12_dct_type coding)
 {
    short *dest = bs->ycbcr_buffer[cc];
 
@@ -1358,9 +1358,9 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 
    memset(dest, 0, sizeof(int16_t) * 64);
    if (picture->mpeg1)
-      get_mpeg1_non_intra_block(bs, picture, dest);
+      get_mpeg1_non_intra_block(bs, picture, quantizer_scale, dest);
    else
-      get_non_intra_block(bs, picture, dest);
+      get_non_intra_block(bs, picture, quantizer_scale, dest);
 
    bs->num_ycbcr_blocks[cc]++;
    bs->ycbcr_stream[cc]++;
@@ -1604,7 +1604,8 @@ do {				                \
 } while (0)
 
 static inline bool
-slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int *x, int *y)
+slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
+           int *quantizer_scale, int *x, int *y)
 {
    const MBAtab * mba;
 
@@ -1616,7 +1617,7 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, in
    *y = (bs->vlc.buf & 0xFF) - 1;
    vl_vlc_restart(&bs->vlc);
 
-   picture->quantizer_scale = get_quantizer_scale(bs, picture);
+   *quantizer_scale = get_quantizer_scale(bs, picture);
 
    /* ignore intra_slice and all the extra data */
    while (bs->vlc.buf & 0x80000000) {
@@ -1669,10 +1670,11 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
 
    /* predictor for DC coefficients in intra blocks */
    int dc_dct_pred[3] = { 0, 0, 0 };
+   int quantizer_scale;
 
    int x, y;
 
-   if (!slice_init(bs, picture, &x, &y))
+   if (!slice_init(bs, picture, &quantizer_scale, &x, &y))
       return false;
 
    mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
@@ -1712,7 +1714,7 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
 
       /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
       if (macroblock_modes & MACROBLOCK_QUANT)
-         picture->quantizer_scale = get_quantizer_scale(bs, picture);
+         quantizer_scale = get_quantizer_scale(bs, picture);
 
       if (macroblock_modes & MACROBLOCK_INTRA) {
 
@@ -1730,12 +1732,12 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
          mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
 
          // unravaled loop of 6 block(i) calls in macroblock()
-         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 1, x, y, dct_type, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 2, x, y, dct_type, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 1, x, y, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 2, x, y, dct_type, quantizer_scale, dc_dct_pred);
 
          if (picture->picture_coding_type == D_TYPE) {
             vl_vlc_needbits(&bs->vlc);
@@ -1793,17 +1795,17 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
 
             // TODO  optimize not fully used for idct accel only mc.
             if (coded_block_pattern & 0x20)
-               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type); // cc0  luma 0
+               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, quantizer_scale, dct_type); // cc0  luma 0
             if (coded_block_pattern & 0x10)
-               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type); // cc0 luma 1
+               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, quantizer_scale, dct_type); // cc0 luma 1
             if (coded_block_pattern & 0x08)
-               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type); // cc0 luma 2
+               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, quantizer_scale, dct_type); // cc0 luma 2
             if (coded_block_pattern & 0x04)
-               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type); // cc0 luma 3
+               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, quantizer_scale, dct_type); // cc0 luma 3
             if (coded_block_pattern & 0x2)
-               slice_non_intra_DCT(bs, picture, 1, x, y, dct_type); // cc1 croma
+               slice_non_intra_DCT(bs, picture, 1, x, y, quantizer_scale, dct_type); // cc1 croma
             if (coded_block_pattern & 0x1)
-               slice_non_intra_DCT(bs, picture, 2, x, y, dct_type); // cc2 croma
+               slice_non_intra_DCT(bs, picture, 2, x, y, quantizer_scale, dct_type); // cc2 croma
          }
 
          dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 828bebb56ec..f46c2dd2bbf 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -122,8 +122,6 @@ struct pipe_mpeg12_picture_desc
    uint8_t *intra_quantizer_matrix;
    uint8_t *non_intra_quantizer_matrix;
 
-   int quantizer_scale;
-
 #if 0
    /* TODO: Use bitfields where possible? */
    unsigned top_field_first;
-- 
cgit v1.2.3


From 6ad846ee78d9d8ba93dcecdefbf89f2b981333ef Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 5 May 2011 20:09:34 +0200
Subject: [g3dvl] move zscan into shaders

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 137 ++++++++-----------------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   |  27 +++--
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h   |   4 +
 src/gallium/auxiliary/vl/vl_zscan.c            |  72 ++++++++++---
 src/gallium/auxiliary/vl/vl_zscan.h            |  21 ++--
 5 files changed, 131 insertions(+), 130 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index aee804a5de1..36eed519092 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -55,6 +55,7 @@
 #include <pipe/p_video_state.h>
 
 #include "vl_vlc.h"
+#include "vl_zscan.h"
 #include "vl_mpeg12_bitstream.h"
 
 /* take num bits from the high part of bit_buf and zero extend them */
@@ -457,29 +458,6 @@ static const MBAtab MBA_11 [] = {
    { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7}
 };
 
-/* original (non-patched) scan tables */
-static const uint8_t mpeg2_scan_norm_orig[64] =
-{
-   /* Zig-Zag scan pattern */
-    0, 1, 8,16, 9, 2, 3,10,
-   17,24,32,25,18,11, 4, 5,
-   12,19,26,33,40,48,41,34,
-   27,20,13, 6, 7,14,21,28,
-   35,42,49,56,57,50,43,36,
-   29,22,15,23,30,37,44,51,
-   58,59,52,45,38,31,39,46,
-   53,60,61,54,47,55,62,63
-};
-
-static const uint8_t mpeg2_scan_alt_orig[64] =
-{
-   /* Alternate scan pattern */
-   0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49,
-   41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43,
-   51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45,
-   53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63
-};
-
 static const int non_linear_quantizer_scale[] = {
    0,  1,  2,  3,  4,  5,   6,   7,
    8, 10, 12, 14, 16, 18,  20,  22,
@@ -749,20 +727,13 @@ get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs)
 
 static inline void
 get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                    int quantizer_scale, short *dest)
+                    const int scan[64], int quantizer_scale, short *dest)
 {
    int i, j, val;
-   const uint8_t *scan;
    uint8_t *quant_matrix = picture->intra_quantizer_matrix;
    int mismatch;
    const DCTtab *tab;
 
-   if (!picture->alternate_scan) {
-      scan =  mpeg2_scan_norm_orig;
-   } else {
-      scan = mpeg2_scan_alt_orig;
-   }
-
    i = 0;
    mismatch = ~dest[0];
 
@@ -788,7 +759,7 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
          SATURATE (val);
-         dest[j] = val;
+         dest[i] = val;
          mismatch ^= val;
 
          bs->vlc.buf <<= 1;
@@ -817,7 +788,7 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[j]) / 16;
 
          SATURATE (val);
-         dest[j] = val;
+         dest[i] = val;
          mismatch ^= val;
 
          vl_vlc_dumpbits(&bs->vlc, 12);
@@ -857,20 +828,13 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 
 static inline void
 get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                    int quantizer_scale, short *dest)
+                    const int scan[64], int quantizer_scale, short *dest)
 {
    int i, j, val;
-   const uint8_t *scan;
    uint8_t *quant_matrix = picture->intra_quantizer_matrix;
    int mismatch;
    const DCTtab * tab;
 
-   if (!picture->alternate_scan) {
-      scan =  mpeg2_scan_norm_orig;
-   } else {
-      scan = mpeg2_scan_alt_orig;
-   }
-
    i = 0;
    mismatch = ~dest[0];
 
@@ -894,7 +858,7 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
             val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
             SATURATE (val);
-            dest[j] = val;
+            dest[i] = val;
             mismatch ^= val;
 
             bs->vlc.buf <<= 1;
@@ -922,7 +886,7 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
             val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[j]) / 16;
 
             SATURATE (val);
-            dest[j] = val;
+            dest[i] = val;
             mismatch ^= val;
 
             vl_vlc_dumpbits(&bs->vlc, 12);
@@ -963,10 +927,9 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 
 static inline void
 get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                    int quantizer_scale, short *dest)
+                    const int scan[64], int quantizer_scale, short *dest)
 {
    int i, j, val;
-   const uint8_t *scan;
    uint8_t *quant_matrix = picture->non_intra_quantizer_matrix;
    int mismatch;
    const DCTtab *tab;
@@ -974,12 +937,6 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
    i = -1;
    mismatch = 1;
 
-   if (!picture->alternate_scan) {
-      scan =  mpeg2_scan_norm_orig;
-   } else {
-      scan = mpeg2_scan_alt_orig;
-   }
-
    vl_vlc_needbits(&bs->vlc);
    if (bs->vlc.buf >= 0x28000000) {
       tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
@@ -1007,7 +964,7 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
          SATURATE (val);
-         dest[j] = val;
+         dest[i] = val;
          mismatch ^= val;
 
          bs->vlc.buf <<= 1;
@@ -1040,7 +997,7 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          val = (val * quantizer_scale * quant_matrix[j]) / 32;
 
          SATURATE (val);
-         dest[j] = val;
+         dest[i] = val;
          mismatch ^= val;
 
          vl_vlc_dumpbits(&bs->vlc, 12);
@@ -1079,21 +1036,14 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 
 static inline void
 get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                      int quantizer_scale, short *dest)
+                      const int scan[64], int quantizer_scale, short *dest)
 {
    int i, j, val;
-   const uint8_t *scan;
    uint8_t *quant_matrix = picture->intra_quantizer_matrix;
    const DCTtab * tab;
 
    i = 0;
 
-   if (!picture->alternate_scan) {
-      scan =  mpeg2_scan_norm_orig;
-   } else {
-      scan = mpeg2_scan_alt_orig;
-   }
-
    vl_vlc_needbits(&bs->vlc);
 
    while (1) {
@@ -1118,7 +1068,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
          SATURATE (val);
-         dest[j] = val;
+         dest[i] = val;
 
          bs->vlc.buf <<= 1;
          vl_vlc_needbits(&bs->vlc);
@@ -1154,7 +1104,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
          val = (val + ~SBITS (val, 1)) | 1;
 
          SATURATE (val);
-         dest[j] = val;
+         dest[i] = val;
 
          vl_vlc_dumpbits(&bs->vlc, 8);
          vl_vlc_needbits(&bs->vlc);
@@ -1191,21 +1141,14 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
 
 static inline void
 get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                          int quantizer_scale, short *dest)
+                          const int scan[64], int quantizer_scale, short *dest)
 {
    int i, j, val;
-   const uint8_t * scan;
    uint8_t *quant_matrix = picture->non_intra_quantizer_matrix;
    const DCTtab * tab;
 
    i = -1;
 
-   if (!picture->alternate_scan) {
-      scan =  mpeg2_scan_norm_orig;
-   } else {
-      scan = mpeg2_scan_alt_orig;
-   }
-
    vl_vlc_needbits(&bs->vlc);
    if (bs->vlc.buf >= 0x28000000) {
       tab = DCT_B14DC_5 + (vl_vlc_ubits(&bs->vlc, 5) - 5);
@@ -1236,7 +1179,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
          SATURATE (val);
-         dest[j] = val;
+         dest[i] = val;
 
          bs->vlc.buf <<= 1;
          vl_vlc_needbits(&bs->vlc);
@@ -1276,7 +1219,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
          val = (val + ~SBITS (val, 1)) | 1;
 
          SATURATE (val);
-         dest[j] = val;
+         dest[i] = val;
 
          vl_vlc_dumpbits(&bs->vlc, 8);
          vl_vlc_needbits(&bs->vlc);
@@ -1312,8 +1255,8 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
 }
 
 static inline void
-slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc, unsigned x, unsigned y,
-                enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3])
+slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int scan[64], int cc,
+                 unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3])
 {
    short *dest = bs->ycbcr_buffer[cc];
 
@@ -1334,11 +1277,11 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
    dest[0] = dc_dct_pred[cc] << (3 - picture->intra_dc_precision);
    if (picture->mpeg1) {
       if (picture->picture_coding_type != D_TYPE)
-          get_mpeg1_intra_block(bs, picture, quantizer_scale, dest);
+          get_mpeg1_intra_block(bs, picture, scan, quantizer_scale, dest);
    } else if (picture->intra_vlc_format)
-      get_intra_block_B15(bs, picture, quantizer_scale, dest);
+      get_intra_block_B15(bs, picture, scan, quantizer_scale, dest);
    else
-      get_intra_block_B14(bs, picture, quantizer_scale, dest);
+      get_intra_block_B14(bs, picture, scan, quantizer_scale, dest);
 
    bs->num_ycbcr_blocks[cc]++;
    bs->ycbcr_stream[cc]++;
@@ -1346,7 +1289,7 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
 }
 
 static inline void
-slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
+slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int scan[64], int cc,
                     unsigned x, unsigned y, int quantizer_scale, enum pipe_mpeg12_dct_type coding)
 {
    short *dest = bs->ycbcr_buffer[cc];
@@ -1358,9 +1301,9 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 
    memset(dest, 0, sizeof(int16_t) * 64);
    if (picture->mpeg1)
-      get_mpeg1_non_intra_block(bs, picture, quantizer_scale, dest);
+      get_mpeg1_non_intra_block(bs, picture, scan, quantizer_scale, dest);
    else
-      get_non_intra_block(bs, picture, quantizer_scale, dest);
+      get_non_intra_block(bs, picture, scan, quantizer_scale, dest);
 
    bs->num_ycbcr_blocks[cc]++;
    bs->ycbcr_stream[cc]++;
@@ -1663,7 +1606,7 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
 }
 
 static inline bool
-decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
+decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, const int scan[64])
 {
    struct pipe_motionvector mv_fwd, mv_bwd;
    enum pipe_mpeg12_dct_type dct_type;
@@ -1732,12 +1675,12 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
          mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
 
          // unravaled loop of 6 block(i) calls in macroblock()
-         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 1, x, y, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, 2, x, y, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, scan, 1, x, y, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, scan, 2, x, y, dct_type, quantizer_scale, dc_dct_pred);
 
          if (picture->picture_coding_type == D_TYPE) {
             vl_vlc_needbits(&bs->vlc);
@@ -1795,17 +1738,17 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
 
             // TODO  optimize not fully used for idct accel only mc.
             if (coded_block_pattern & 0x20)
-               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, quantizer_scale, dct_type); // cc0  luma 0
+               slice_non_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+0, quantizer_scale, dct_type); // cc0  luma 0
             if (coded_block_pattern & 0x10)
-               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, quantizer_scale, dct_type); // cc0 luma 1
+               slice_non_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+0, quantizer_scale, dct_type); // cc0 luma 1
             if (coded_block_pattern & 0x08)
-               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, quantizer_scale, dct_type); // cc0 luma 2
+               slice_non_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+1, quantizer_scale, dct_type); // cc0 luma 2
             if (coded_block_pattern & 0x04)
-               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, quantizer_scale, dct_type); // cc0 luma 3
+               slice_non_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+1, quantizer_scale, dct_type); // cc0 luma 3
             if (coded_block_pattern & 0x2)
-               slice_non_intra_DCT(bs, picture, 1, x, y, quantizer_scale, dct_type); // cc1 croma
+               slice_non_intra_DCT(bs, picture, scan, 1, x, y, quantizer_scale, dct_type); // cc1 croma
             if (coded_block_pattern & 0x1)
-               slice_non_intra_DCT(bs, picture, 2, x, y, quantizer_scale, dct_type); // cc2 croma
+               slice_non_intra_DCT(bs, picture, scan, 2, x, y, quantizer_scale, dct_type); // cc2 croma
          }
 
          dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
@@ -1918,6 +1861,8 @@ void
 vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
                    struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3])
 {
+   const int *scan;
+
    assert(bs);
    assert(num_ycbcr_blocks);
    assert(buffer && num_bytes);
@@ -1926,5 +1871,7 @@ vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffe
 
    vl_vlc_init(&bs->vlc, buffer, num_bytes);
 
-   while(decode_slice(bs, picture));
+   scan = picture->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
+
+   while(decode_slice(bs, picture, scan));
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 4ac3b90ad78..b7d9b4706a5 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -358,6 +358,9 @@ vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)
          mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i);
 
       vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream);
+   } else {
+      for (i = 0; i < VL_MAX_PLANES; ++i)
+         vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear);
    }
 }
 
@@ -409,6 +412,16 @@ vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
                                   unsigned num_ycbcr_blocks[3])
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   struct vl_mpeg12_decoder *dec;
+   unsigned i;
+
+   assert(buf);
+
+   dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
+   assert(dec);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      vl_zscan_set_layout(&buf->zscan[i], picture->alternate_scan ? dec->zscan_alternate : dec->zscan_normal);
 
    vl_mpg12_bs_decode(&buf->bs, num_bytes, data, picture, num_ycbcr_blocks);
 }
@@ -464,6 +477,10 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    pipe_resource_reference(&dec->quads.buffer, NULL);
    pipe_resource_reference(&dec->pos.buffer, NULL);
 
+   pipe_sampler_view_reference(&dec->zscan_linear, NULL);
+   pipe_sampler_view_reference(&dec->zscan_normal, NULL);
+   pipe_sampler_view_reference(&dec->zscan_alternate, NULL);
+
    FREE(dec);
 }
 
@@ -676,8 +693,6 @@ find_first_supported_format(struct vl_mpeg12_decoder *dec,
 static bool
 init_zscan(struct vl_mpeg12_decoder *dec)
 {
-   struct pipe_sampler_view *layout;
-
    unsigned num_channels;
 
    assert(dec);
@@ -693,7 +708,9 @@ init_zscan(struct vl_mpeg12_decoder *dec)
    if (dec->zscan_source_format == PIPE_FORMAT_NONE)
       return false;
 
-   layout = vl_zscan_linear(dec->pipe, dec->blocks_per_line);
+   dec->zscan_linear = vl_zscan_layout(dec->pipe, vl_zscan_linear, dec->blocks_per_line);
+   dec->zscan_normal = vl_zscan_layout(dec->pipe, vl_zscan_normal, dec->blocks_per_line);
+   dec->zscan_alternate = vl_zscan_layout(dec->pipe, vl_zscan_alternate, dec->blocks_per_line);
 
    num_channels = dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 4 : 1;
 
@@ -701,14 +718,10 @@ init_zscan(struct vl_mpeg12_decoder *dec)
                       dec->blocks_per_line, dec->max_blocks, num_channels))
       return false;
 
-   vl_zscan_set_layout(&dec->zscan_y, layout);
-
    if (!vl_zscan_init(&dec->zscan_c, dec->pipe, dec->chroma_width, dec->chroma_height,
                       dec->blocks_per_line, dec->max_blocks, num_channels))
       return false;
 
-   vl_zscan_set_layout(&dec->zscan_c, layout);
-
    return true;
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 66356694b59..0ea71d11ca7 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -64,6 +64,10 @@ struct vl_mpeg12_decoder
 
    void *sampler_ycbcr;
 
+   struct pipe_sampler_view *zscan_linear;
+   struct pipe_sampler_view *zscan_normal;
+   struct pipe_sampler_view *zscan_alternate;
+
    struct vl_zscan zscan_y, zscan_c;
    struct vl_idct idct_y, idct_c;
    struct vl_mc mc_y, mc_c;
diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
index 4d4d3fd6d95..d06c567bfb4 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.c
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -48,6 +48,45 @@ enum VS_OUTPUT
    VS_O_VTEX
 };
 
+const int vl_zscan_linear[] =
+{
+   /* Linear scan pattern */
+    0, 1, 2, 3, 4, 5, 6, 7,
+    8, 9,10,11,12,13,14,15,
+   16,17,18,19,20,21,22,23,
+   24,25,26,27,28,29,30,31,
+   32,33,34,35,36,37,38,39,
+   40,41,42,43,44,45,46,47,
+   48,49,50,51,52,53,54,55,
+   56,57,58,59,60,61,62,63
+};
+
+const int vl_zscan_normal[] =
+{
+   /* Zig-Zag scan pattern */
+    0, 1, 8,16, 9, 2, 3,10,
+   17,24,32,25,18,11, 4, 5,
+   12,19,26,33,40,48,41,34,
+   27,20,13, 6, 7,14,21,28,
+   35,42,49,56,57,50,43,36,
+   29,22,15,23,30,37,44,51,
+   58,59,52,45,38,31,39,46,
+   53,60,61,54,47,55,62,63
+};
+
+const int vl_zscan_alternate[] =
+{
+   /* Alternate scan pattern */
+    0, 8,16,24, 1, 9, 2,10,
+   17,25,32,40,48,56,57,49,
+   41,33,26,18, 3,11, 4,12,
+   19,27,34,42,50,58,35,43,
+   51,59,20,28, 5,13, 6,14,
+   21,29,36,44,52,60,37,45,
+   53,61,22,30, 7,15,23,31,
+   38,46,54,62,39,47,55,63
+};
+
 static void *
 create_vert_shader(struct vl_zscan *zscan)
 {
@@ -288,10 +327,12 @@ cleanup_state(struct vl_zscan *zscan)
 }
 
 struct pipe_sampler_view *
-vl_zscan_linear(struct pipe_context *pipe, unsigned blocks_per_line)
+vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line)
 {
    const unsigned total_size = blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT;
 
+   int patched_layout[64];
+
    struct pipe_resource res_tmpl, *res;
    struct pipe_sampler_view sv_tmpl, *sv;
    struct pipe_transfer *buf_transfer;
@@ -306,7 +347,10 @@ vl_zscan_linear(struct pipe_context *pipe, unsigned blocks_per_line)
       1
    };
 
-   assert(pipe && blocks_per_line);
+   assert(pipe && layout && blocks_per_line);
+
+   for (i = 0; i < 64; ++i)
+      patched_layout[layout[i]] = i;
 
    memset(&res_tmpl, 0, sizeof(res_tmpl));
    res_tmpl.target = PIPE_TEXTURE_2D;
@@ -340,7 +384,7 @@ vl_zscan_linear(struct pipe_context *pipe, unsigned blocks_per_line)
    for (i = 0; i < blocks_per_line; ++i)
       for (y = 0; y < BLOCK_HEIGHT; ++y)
          for (x = 0; x < BLOCK_WIDTH; ++x) {
-            float addr = x + y * BLOCK_WIDTH +
+            float addr = patched_layout[x + y * BLOCK_WIDTH] +
                i * BLOCK_WIDTH * BLOCK_HEIGHT;
 
             addr /= total_size;
@@ -414,15 +458,6 @@ vl_zscan_cleanup(struct vl_zscan *zscan)
    cleanup_state(zscan);
 }
 
-void
-vl_zscan_set_layout(struct vl_zscan *zscan, struct pipe_sampler_view *layout)
-{
-   assert(zscan);
-   assert(layout);
-
-   pipe_sampler_view_reference(&zscan->scan, layout);
-}
-
 #if 0
 // TODO
 void
@@ -440,8 +475,6 @@ vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
    buffer->zscan = zscan;
 
    pipe_sampler_view_reference(&buffer->src, src);
-   pipe_sampler_view_reference(&buffer->scan, zscan->scan);
-   pipe_sampler_view_reference(&buffer->quant, zscan->quant);
 
    buffer->viewport.scale[0] = dst->width;
    buffer->viewport.scale[1] = dst->height;
@@ -466,11 +499,20 @@ vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer)
    assert(buffer);
 
    pipe_sampler_view_reference(&buffer->src, NULL);
-   pipe_sampler_view_reference(&buffer->scan, NULL);
+   pipe_sampler_view_reference(&buffer->layout, NULL);
    pipe_sampler_view_reference(&buffer->quant, NULL);
    pipe_surface_reference(&buffer->fb_state.cbufs[0], NULL);
 }
 
+void
+vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout)
+{
+   assert(buffer);
+   assert(layout);
+
+   pipe_sampler_view_reference(&buffer->layout, layout);
+}
+
 void
 vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)
 {
diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h
index 28b990ca83b..ccc6bc4a8a1 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.h
+++ b/src/gallium/auxiliary/vl/vl_zscan.h
@@ -54,7 +54,6 @@ struct vl_zscan
 
    void *vs, *fs;
 
-   struct pipe_sampler_view *scan;
    struct pipe_sampler_view *quant;
 };
 
@@ -65,20 +64,16 @@ struct vl_zscan_buffer
    struct pipe_viewport_state viewport;
    struct pipe_framebuffer_state fb_state;
 
-   struct pipe_sampler_view *src, *scan, *quant;
+   struct pipe_sampler_view *src, *layout, *quant;
    struct pipe_surface *dst;
 };
 
-struct pipe_sampler_view *
-vl_zscan_linear(struct pipe_context *pipe, unsigned blocks_per_line);
+extern const int vl_zscan_linear[];
+extern const int vl_zscan_normal[];
+extern const int vl_zscan_alternate[];
 
-#if 0
 struct pipe_sampler_view *
-vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line);
-
-struct pipe_sampler_view *
-vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line);
-#endif
+vl_zscan_layout(struct pipe_context *pipe, const int layout[64], unsigned blocks_per_line);
 
 bool
 vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
@@ -86,9 +81,6 @@ vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
               unsigned blocks_per_line, unsigned blocks_total,
               unsigned num_channels);
 
-void
-vl_zscan_set_layout(struct vl_zscan *zscan, struct pipe_sampler_view *layout);
-
 void
 vl_zscan_cleanup(struct vl_zscan *zscan);
 
@@ -104,6 +96,9 @@ vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
 void
 vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer);
 
+void
+vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout);
+
 void
 vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances);
 
-- 
cgit v1.2.3


From a67a0bb60c9f17608fe78467263ef7dbc5b49b0a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 7 May 2011 14:39:35 +0200
Subject: [g3dvl] using reciprocals for size calc is sometimes not precise
 enough

---
 src/gallium/auxiliary/vl/vl_compositor.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index cc257e5614f..74512c1d40a 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -378,16 +378,16 @@ default_rect(struct vl_compositor_layer *layer)
 }
 
 static inline struct vertex2f
-calc_topleft(struct vertex2f inv_size, struct pipe_video_rect rect)
+calc_topleft(struct vertex2f size, struct pipe_video_rect rect)
 {
-   struct vertex2f res = { rect.x * inv_size.x, rect.y * inv_size.y };
+   struct vertex2f res = { rect.x / size.x, rect.y / size.y };
    return res;
 }
 
 static inline struct vertex2f
-calc_bottomright(struct vertex2f inv_size, struct pipe_video_rect rect)
+calc_bottomright(struct vertex2f size, struct pipe_video_rect rect)
 {
-   struct vertex2f res = { (rect.x + rect.w) * inv_size.x, (rect.y + rect.h) * inv_size.y };
+   struct vertex2f res = { (rect.x + rect.w) / size.x, (rect.y + rect.h) / size.y };
    return res;
 }
 
@@ -395,12 +395,12 @@ static inline void
 calc_src_and_dst(struct vl_compositor_layer *layer, unsigned width, unsigned height,
                  struct pipe_video_rect src, struct pipe_video_rect dst)
 {
-   struct vertex2f inv_size =  { 1.0f / width, 1.0f / height };
+   struct vertex2f size =  { width, height };
 
-   layer->src.tl = calc_topleft(inv_size, src);
-   layer->src.br = calc_bottomright(inv_size, src);
-   layer->dst.tl = calc_topleft(inv_size, dst);
-   layer->dst.br = calc_bottomright(inv_size, dst);
+   layer->src.tl = calc_topleft(size, src);
+   layer->src.br = calc_bottomright(size, src);
+   layer->dst.tl = calc_topleft(size, dst);
+   layer->dst.br = calc_bottomright(size, dst);
 }
 
 static void
-- 
cgit v1.2.3


From 6a5d2d7967074be2ea87c06ee1e9af3ac34a5c99 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 7 May 2011 14:49:58 +0200
Subject: [g3dvl] rgba surfaces are clearing if the alpha channel is one

---
 src/gallium/auxiliary/vl/vl_compositor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 74512c1d40a..83c93637219 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -645,7 +645,7 @@ vl_compositor_set_rgba_layer(struct pipe_video_compositor *compositor,
    assert(layer < VL_COMPOSITOR_MAX_LAYERS);
 
    c->used_layers |= 1 << layer;
-   c->layers[layer].clearing = false;
+   c->layers[layer].clearing = rgba->swizzle_a == PIPE_SWIZZLE_ONE;
    c->layers[layer].fs = c->fs_rgba;
    c->layers[layer].samplers[0] = c->sampler_linear;
    c->layers[layer].samplers[1] = NULL;
-- 
cgit v1.2.3


From e0cc970a54660035942ef8f8db122835e1407676 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 7 May 2011 14:51:06 +0200
Subject: vdpau: as long as we don't have a background picture we don't want an
 alpha channel

---
 src/gallium/state_trackers/vdpau/output.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
index 0257d38017a..72e63fc2f32 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -81,6 +81,10 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
 
    memset(&sv_templ, 0, sizeof(sv_templ));
    u_sampler_view_default_template(&sv_templ, res, res->format);
+
+   // as long as we don't have a background picture we don't want an alpha channel
+   sv_templ.swizzle_a = PIPE_SWIZZLE_ONE;
+
    vlsurface->sampler_view = context->create_sampler_view(context, res, &sv_templ);
    if (!vlsurface->sampler_view) {
       FREE(dev);
-- 
cgit v1.2.3


From cd13ec253ab8befed6ee9ece20ceebed9f73050f Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 7 May 2011 15:00:52 +0200
Subject: vdpau: implement PresentationQueueSetBackgroundColor

---
 src/gallium/state_trackers/vdpau/presentation.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index 2f029f07fb1..02fcfbd0746 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -109,10 +109,20 @@ VdpStatus
 vlVdpPresentationQueueSetBackgroundColor(VdpPresentationQueue presentation_queue,
                                          VdpColor *const background_color)
 {
+   vlVdpPresentationQueue *pq;
+
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Setting Background Color\n");
+
    if (!background_color)
       return VDP_STATUS_INVALID_POINTER;
 
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   pq = vlGetDataHTAB(presentation_queue);
+   if (!pq)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   pq->compositor->set_clear_color(pq->compositor, (float*)background_color);
+
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
-- 
cgit v1.2.3


From e06a09ac37c1ab55326922abc314fe84e3ac5824 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 7 May 2011 15:28:04 +0200
Subject: vdpau: use multiple buffers to speed up rendering

---
 src/gallium/state_trackers/vdpau/decode.c        | 32 ++++++++++++++++++------
 src/gallium/state_trackers/vdpau/vdpau_private.h |  4 ++-
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 3a366d35a93..0f658a92a11 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -45,6 +45,7 @@ vlVdpDecoderCreate(VdpDevice device,
    vlVdpDevice *dev;
    vlVdpDecoder *vldecoder;
    VdpStatus ret;
+   unsigned i;
 
    VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Creating decoder\n");
 
@@ -83,10 +84,14 @@ vlVdpDecoderCreate(VdpDevice device,
       goto error_decoder;
    }
 
-   vldecoder->buffer = vldecoder->decoder->create_buffer(vldecoder->decoder);
-   if (!vldecoder->buffer) {
-      ret = VDP_STATUS_ERROR;
-      goto error_buffer;
+   vldecoder->cur_buffer = 0;
+
+   for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i) {
+      vldecoder->buffer[i] = vldecoder->decoder->create_buffer(vldecoder->decoder);
+      if (!vldecoder->buffer[i]) {
+         ret = VDP_STATUS_ERROR;
+         goto error_buffer;
+      }
    }
 
    *decoder = vlAddDataHTAB(vldecoder);
@@ -100,9 +105,12 @@ vlVdpDecoderCreate(VdpDevice device,
    return VDP_STATUS_OK;
 
 error_handle:
-   vldecoder->buffer->destroy(vldecoder->buffer);
-
 error_buffer:
+
+   for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i)
+      if (vldecoder->buffer[i])
+         vldecoder->buffer[i]->destroy(vldecoder->buffer[i]);
+
    vldecoder->decoder->destroy(vldecoder->decoder);
 
 error_decoder:
@@ -114,6 +122,7 @@ VdpStatus
 vlVdpDecoderDestroy(VdpDecoder decoder)
 {
    vlVdpDecoder *vldecoder;
+   unsigned i;
 
    VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Destroying decoder\n");
 
@@ -121,7 +130,10 @@ vlVdpDecoderDestroy(VdpDecoder decoder)
    if (!vldecoder)
       return VDP_STATUS_INVALID_HANDLE;
 
-   vldecoder->buffer->destroy(vldecoder->buffer);
+   for (i = 0; i < VL_NUM_DECODE_BUFFERS; ++i)
+      if (vldecoder->buffer[i])
+         vldecoder->buffer[i]->destroy(vldecoder->buffer[i]);
+
    vldecoder->decoder->destroy(vldecoder->decoder);
 
    FREE(vldecoder);
@@ -234,7 +246,11 @@ vlVdpDecoderRender(VdpDecoder decoder,
    switch (vldecoder->decoder->profile)   {
    case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
    case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
-      return vlVdpDecoderRenderMpeg2(vldecoder->decoder, vldecoder->buffer, vlsurf->video_buffer,
+      ++vldecoder->cur_buffer;
+      vldecoder->cur_buffer %= VL_NUM_DECODE_BUFFERS;
+      return vlVdpDecoderRenderMpeg2(vldecoder->decoder,
+                                     vldecoder->buffer[vldecoder->cur_buffer],
+                                     vlsurf->video_buffer,
                                      (VdpPictureInfoMPEG1Or2 *)picture_info,
                                      bitstream_buffer_count,bitstream_buffers);
       break;
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index cbc51d57e36..1d6ca39fe39 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -45,6 +45,7 @@
 #define TOSTRING(x) QUOTEME(x)
 #define INFORMATION_STRING TOSTRING(INFORMATION)
 #define VL_HANDLES
+#define VL_NUM_DECODE_BUFFERS 4
 
 static inline enum pipe_video_chroma_format
 ChromaToPipe(VdpChromaType vdpau_type)
@@ -213,7 +214,8 @@ typedef struct
 {
    vlVdpDevice *device;
    struct pipe_video_decoder *decoder;
-   struct pipe_video_decode_buffer *buffer;
+   struct pipe_video_decode_buffer *buffer[VL_NUM_DECODE_BUFFERS];
+   unsigned cur_buffer;
 } vlVdpDecoder;
 
 typedef uint32_t vlHandle;
-- 
cgit v1.2.3


From 626352648a8c0b5d92a979d1b1adbf472e711ff8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 7 May 2011 18:09:31 +0200
Subject: [g3dvl] memcpy the DCT buffer instead of writing it directly

It looks like texture buffers are not cached so this seems to be alot faster
---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 36eed519092..9401081307a 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -1258,7 +1258,7 @@ static inline void
 slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int scan[64], int cc,
                  unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3])
 {
-   short *dest = bs->ycbcr_buffer[cc];
+   short dest[64];
 
    bs->ycbcr_stream[cc]->x = x;
    bs->ycbcr_stream[cc]->y = y;
@@ -1283,6 +1283,8 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
    else
       get_intra_block_B14(bs, picture, scan, quantizer_scale, dest);
 
+   memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
+
    bs->num_ycbcr_blocks[cc]++;
    bs->ycbcr_stream[cc]++;
    bs->ycbcr_buffer[cc] += 64;
@@ -1292,7 +1294,7 @@ static inline void
 slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int scan[64], int cc,
                     unsigned x, unsigned y, int quantizer_scale, enum pipe_mpeg12_dct_type coding)
 {
-   short *dest = bs->ycbcr_buffer[cc];
+   short dest[64];
 
    bs->ycbcr_stream[cc]->x = x;
    bs->ycbcr_stream[cc]->y = y;
@@ -1305,6 +1307,8 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
    else
       get_non_intra_block(bs, picture, scan, quantizer_scale, dest);
 
+   memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
+
    bs->num_ycbcr_blocks[cc]++;
    bs->ycbcr_stream[cc]++;
    bs->ycbcr_buffer[cc] += 64;
-- 
cgit v1.2.3


From 3b773d06d2edd39ce6e6ab6e306e3cca121dddfc Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 8 May 2011 13:39:56 +0200
Subject: [g3dvl] fix field selection of mb without mc

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 41 +++++++++++++++-----------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 9401081307a..02bf90316ca 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -1612,6 +1612,7 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
 static inline bool
 decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, const int scan[64])
 {
+   enum pipe_video_field_select default_field_select;
    struct pipe_motionvector mv_fwd, mv_bwd;
    enum pipe_mpeg12_dct_type dct_type;
 
@@ -1621,14 +1622,28 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
 
    int x, y;
 
+   switch(picture->picture_structure) {
+   case TOP_FIELD:
+      default_field_select = PIPE_VIDEO_TOP_FIELD;
+      break;
+
+   case BOTTOM_FIELD:
+      default_field_select = PIPE_VIDEO_BOTTOM_FIELD;
+      break;
+
+   default:
+      default_field_select = PIPE_VIDEO_FRAME;
+      break;
+   }
+
    if (!slice_init(bs, picture, &quantizer_scale, &x, &y))
       return false;
 
    mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
-   mv_fwd.top.field_select = mv_fwd.bottom.field_select = PIPE_VIDEO_FRAME;
+   mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select;
 
    mv_bwd.top.x = mv_bwd.top.y = mv_bwd.bottom.x = mv_bwd.bottom.y = 0;
-   mv_bwd.top.field_select = mv_bwd.bottom.field_select = PIPE_VIDEO_FRAME;
+   mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select;
 
    while (1) {
       int macroblock_modes;
@@ -1648,6 +1663,10 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
          break;
 
       default:
+         mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select;
+         mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select;
+
+         /* fall through */
       case MACROBLOCK_MOTION_FORWARD:
          mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
          mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
@@ -1787,22 +1806,8 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
          //TODO  conversion to signed format signed format
          dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
 
-         switch(picture->picture_structure) {
-         case FRAME_PICTURE:
-            mv_fwd.top.field_select = mv_fwd.bottom.field_select = PIPE_VIDEO_FRAME;
-            mv_bwd.top.field_select = mv_bwd.bottom.field_select = PIPE_VIDEO_FRAME;
-            break;
-
-         case TOP_FIELD:
-            mv_fwd.top.field_select = mv_fwd.bottom.field_select = PIPE_VIDEO_TOP_FIELD;
-            mv_bwd.top.field_select = mv_bwd.bottom.field_select = PIPE_VIDEO_TOP_FIELD;
-            break;
-
-         case BOTTOM_FIELD:
-            mv_fwd.top.field_select = mv_fwd.bottom.field_select = PIPE_VIDEO_BOTTOM_FIELD;
-            mv_bwd.top.field_select = mv_bwd.bottom.field_select = PIPE_VIDEO_BOTTOM_FIELD;
-            break;
-         }
+         mv_fwd.top.field_select = mv_fwd.bottom.field_select = default_field_select;
+         mv_bwd.top.field_select = mv_bwd.bottom.field_select = default_field_select;
 
          if (picture->picture_coding_type == P_TYPE) {
             mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
-- 
cgit v1.2.3


From ce31aaec02de0ef509dcee9d641c39623017a638 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 8 May 2011 22:28:33 +0200
Subject: [g3dvl] move dct_type detection out of get_macroblock_modes

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 52 ++++++++++++--------------
 1 file changed, 23 insertions(+), 29 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 02bf90316ca..7723d487d84 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -76,7 +76,6 @@ do {					\
 #define MACROBLOCK_MOTION_BACKWARD 4
 #define MACROBLOCK_MOTION_FORWARD 8
 #define MACROBLOCK_QUANT 16
-#define DCT_TYPE_INTERLACED 32
 
 /* motion_type */
 #define MOTION_TYPE_MASK (3*64)
@@ -478,11 +477,6 @@ get_macroblock_modes(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * p
       vl_vlc_dumpbits(&bs->vlc, tab->len);
       macroblock_modes = tab->modes;
 
-      if ((!(picture->frame_pred_frame_dct)) && (picture->picture_structure == FRAME_PICTURE)) {
-         macroblock_modes |= vl_vlc_ubits(&bs->vlc, 1) * DCT_TYPE_INTERLACED;
-         vl_vlc_dumpbits(&bs->vlc, 1);
-      }
-
       return macroblock_modes;
 
    case P_TYPE:
@@ -506,10 +500,6 @@ get_macroblock_modes(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * p
             macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
             vl_vlc_dumpbits(&bs->vlc, 2);
           }
-          if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
-            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 1) * DCT_TYPE_INTERLACED;
-            vl_vlc_dumpbits(&bs->vlc, 1);
-          }
           return macroblock_modes;
       }
 
@@ -524,23 +514,13 @@ get_macroblock_modes(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * p
             macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
             vl_vlc_dumpbits(&bs->vlc, 2);
           }
-          return macroblock_modes;
       } else if (picture->frame_pred_frame_dct) {
-          /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
           macroblock_modes |= MC_FRAME;
-          return macroblock_modes;
-      } else {
-          if (macroblock_modes & MACROBLOCK_INTRA)
-            goto intra;
+      } else if (!(macroblock_modes & MACROBLOCK_INTRA)) {
           macroblock_modes |= vl_vlc_ubits(&bs->vlc, 2) * MOTION_TYPE_BASE;
           vl_vlc_dumpbits(&bs->vlc, 2);
-          if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) {
-          intra:
-            macroblock_modes |= vl_vlc_ubits(&bs->vlc, 1) * DCT_TYPE_INTERLACED;
-            vl_vlc_dumpbits(&bs->vlc, 1);
-          }
-          return macroblock_modes;
       }
+      return macroblock_modes;
 
    case D_TYPE:
 
@@ -552,6 +532,21 @@ get_macroblock_modes(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * p
    }
 }
 
+static inline enum pipe_mpeg12_dct_type
+get_dct_type(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int macroblock_modes)
+{
+   enum pipe_mpeg12_dct_type dct_type = PIPE_MPEG12_DCT_TYPE_FRAME;
+
+   if ((picture->picture_structure == FRAME_PICTURE) &&
+       (!picture->frame_pred_frame_dct) &&
+       (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))) {
+
+      dct_type = vl_vlc_ubits(&bs->vlc, 1) ? PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
+      vl_vlc_dumpbits(&bs->vlc, 1);
+   }
+   return dct_type;
+}
+
 static inline int
 get_quantizer_scale(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture)
 {
@@ -1652,9 +1647,8 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
 
       vl_vlc_needbits(&bs->vlc);
 
-      macroblock_modes = get_macroblock_modes(bs, picture); //macroblock_modes()
-      dct_type = macroblock_modes & DCT_TYPE_INTERLACED ?
-         PIPE_MPEG12_DCT_TYPE_FIELD : PIPE_MPEG12_DCT_TYPE_FRAME;
+      macroblock_modes = get_macroblock_modes(bs, picture);
+      dct_type = get_dct_type(bs, picture, macroblock_modes);
 
       switch(macroblock_modes & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD)) {
       case (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD):
@@ -1702,8 +1696,8 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
          slice_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
          slice_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
          slice_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, scan, 1, x, y, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, scan, 2, x, y, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, scan, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, scan, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
 
          if (picture->picture_coding_type == D_TYPE) {
             vl_vlc_needbits(&bs->vlc);
@@ -1769,9 +1763,9 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
             if (coded_block_pattern & 0x04)
                slice_non_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+1, quantizer_scale, dct_type); // cc0 luma 3
             if (coded_block_pattern & 0x2)
-               slice_non_intra_DCT(bs, picture, scan, 1, x, y, quantizer_scale, dct_type); // cc1 croma
+               slice_non_intra_DCT(bs, picture, scan, 1, x, y, quantizer_scale, PIPE_MPEG12_DCT_TYPE_FRAME); // cc1 croma
             if (coded_block_pattern & 0x1)
-               slice_non_intra_DCT(bs, picture, scan, 2, x, y, quantizer_scale, dct_type); // cc2 croma
+               slice_non_intra_DCT(bs, picture, scan, 2, x, y, quantizer_scale, PIPE_MPEG12_DCT_TYPE_FRAME); // cc2 croma
          }
 
          dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
-- 
cgit v1.2.3


From 6c731191c08561ea247cb2fcfacdf08f6b842363 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 14 May 2011 17:11:33 +0200
Subject: [g3dvl] fix fi_frame mb increment

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 7723d487d84..a7aa5cdd073 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -1534,8 +1534,6 @@ do {							\
 
 #define NEXT_MACROBLOCK		                \
 do {				                \
-   bs->mv_stream[0][x+y*bs->width] = mv_fwd;    \
-   bs->mv_stream[1][x+y*bs->width] = mv_bwd;    \
    ++x;				                \
    if (x == bs->width) {	                \
       ++y;                                      \
@@ -1545,6 +1543,22 @@ do {				                \
    }                                            \
 } while (0)
 
+static inline void
+store_motionvectors(struct vl_mpg12_bs *bs, int x, int y,
+                    struct pipe_motionvector *mv_fwd,
+                    struct pipe_motionvector *mv_bwd)
+{
+   bs->mv_stream[0][x+y*bs->width].top = mv_fwd->top;
+   bs->mv_stream[0][x+y*bs->width].bottom =
+      mv_fwd->top.field_select == PIPE_VIDEO_FRAME ?
+      mv_fwd->top : mv_fwd->bottom;
+
+   bs->mv_stream[1][x+y*bs->width].top = mv_bwd->top;
+   bs->mv_stream[1][x+y*bs->width].bottom =
+      mv_bwd->top.field_select == PIPE_VIDEO_FRAME ?
+      mv_bwd->top : mv_bwd->bottom;
+}
+
 static inline bool
 slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
            int *quantizer_scale, int *x, int *y)
@@ -1771,6 +1785,7 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
          dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
       }
 
+      store_motionvectors(bs, x, y, &mv_fwd, &mv_bwd);
       NEXT_MACROBLOCK;
 
       vl_vlc_needbits(&bs->vlc);
@@ -1807,7 +1822,9 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
             mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
             mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
          }
+
          do {
+            store_motionvectors(bs, x, y, &mv_fwd, &mv_bwd);
             NEXT_MACROBLOCK;
          } while (--mba_inc);
       }
-- 
cgit v1.2.3


From 56457ffe11fd2b325f2bef3d99ca7e57b44e5c6a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 15 May 2011 13:08:38 +0200
Subject: [g3dvl] move alignment of buffers to mb size into context

---
 src/gallium/auxiliary/vl/vl_context.c        | 8 ++++----
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 3 ---
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index be28bb507e6..e4805eae98c 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -195,8 +195,8 @@ vl_context_create_decoder(struct pipe_video_context *context,
    assert(context);
    assert(width > 0 && height > 0);
 
-   buffer_width = ctx->pot_buffers ? util_next_power_of_two(width) : width;
-   buffer_height = ctx->pot_buffers ? util_next_power_of_two(height) : height;
+   buffer_width = ctx->pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
+   buffer_height = ctx->pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
 
    switch (u_reduce_video_profile(profile)) {
       case PIPE_VIDEO_CODEC_MPEG12:
@@ -237,8 +237,8 @@ vl_context_create_buffer(struct pipe_video_context *context,
       return NULL;
    }
 
-   buffer_width = ctx->pot_buffers ? util_next_power_of_two(width) : width;
-   buffer_height = ctx->pot_buffers ? util_next_power_of_two(height) : height;
+   buffer_width = ctx->pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
+   buffer_height = ctx->pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
 
    result = vl_video_buffer_init(context, ctx->pipe,
                                  buffer_width, buffer_height, 1,
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index b7d9b4706a5..f3e19868926 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -877,9 +877,6 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    dec->base.create_buffer = vl_mpeg12_create_buffer;
    dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer;
 
-   dec->base.width = align(width, MACROBLOCK_WIDTH);
-   dec->base.height = align(height, MACROBLOCK_HEIGHT);
-
    dec->pipe = pipe;
 
    dec->quads = vl_vb_upload_quads(dec->pipe);
-- 
cgit v1.2.3


From 0121aae967d3d1366cccc8946cf89ad22818365e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 15 May 2011 14:07:39 +0200
Subject: [g3dvl] no need for seperate intermediate buffers any more

This should reduce the video memory footprint drastically.
---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 204 ++++++++++++++-------------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h |   8 +-
 2 files changed, 110 insertions(+), 102 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index f3e19868926..3887cf6f1b4 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -102,9 +102,9 @@ init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
       goto error_sampler;
 
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-      destination = buffer->idct_source->get_surfaces(buffer->idct_source);
+      destination = dec->idct_source->get_surfaces(dec->idct_source);
    else
-      destination = buffer->mc_source->get_surfaces(buffer->mc_source);
+      destination = dec->mc_source->get_surfaces(dec->mc_source);
 
    if (!destination)
       goto error_surface;
@@ -143,8 +143,6 @@ cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer)
 static bool
 init_idct_buffer(struct vl_mpeg12_buffer *buffer)
 {
-   enum pipe_format formats[3];
-
    struct pipe_sampler_view **idct_source_sv, **mc_source_sv;
    struct pipe_surface **idct_surfaces;
 
@@ -156,33 +154,15 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer)
 
    dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
 
-   formats[0] = formats[1] = formats[2] = dec->idct_source_format;
-   buffer->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                              dec->base.width / 4, dec->base.height, 1,
-                                              dec->base.chroma_format,
-                                              formats, PIPE_USAGE_STATIC);
-   if (!buffer->idct_source)
-      goto error_source;
-
-   formats[0] = formats[1] = formats[2] = dec->mc_source_format;
-   buffer->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                            dec->base.width / dec->nr_of_idct_render_targets,
-                                            dec->base.height / 4, dec->nr_of_idct_render_targets,
-                                            dec->base.chroma_format,
-                                            formats, PIPE_USAGE_STATIC);
-
-   if (!buffer->mc_source)
-      goto error_mc_source;
-
-   idct_source_sv = buffer->idct_source->get_sampler_view_planes(buffer->idct_source);
+   idct_source_sv = dec->idct_source->get_sampler_view_planes(dec->idct_source);
    if (!idct_source_sv)
       goto error_source_sv;
 
-   mc_source_sv = buffer->mc_source->get_sampler_view_planes(buffer->mc_source);
+   mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
    if (!mc_source_sv)
       goto error_mc_source_sv;
 
-   idct_surfaces = buffer->mc_source->get_surfaces(buffer->mc_source);
+   idct_surfaces = dec->mc_source->get_surfaces(dec->mc_source);
    if (!idct_surfaces)
       goto error_surfaces;
 
@@ -201,12 +181,6 @@ error_plane:
 error_surfaces:
 error_mc_source_sv:
 error_source_sv:
-   buffer->mc_source->destroy(buffer->mc_source);
-
-error_mc_source:
-   buffer->idct_source->destroy(buffer->idct_source);
-
-error_source:
    return false;
 }
 
@@ -222,31 +196,18 @@ cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
    vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
    vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
    vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
-   buf->idct_source->destroy(buf->idct_source);
 }
 
 static bool
 init_mc_buffer(struct vl_mpeg12_buffer *buf)
 {
    struct vl_mpeg12_decoder *dec;
-   enum pipe_format formats[3];
 
    assert(buf);
 
    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
    assert(dec);
 
-   if (dec->base.entrypoint > PIPE_VIDEO_ENTRYPOINT_IDCT) {
-      formats[0] = formats[1] = formats[2] =dec->mc_source_format;
-      buf->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                               dec->base.width, dec->base.height, 1,
-                                               dec->base.chroma_format,
-                                               formats, PIPE_USAGE_STATIC);
-
-      if (!buf->mc_source)
-         goto error_mc_source;
-   }
-
    if(!vl_mc_init_buffer(&dec->mc_y, &buf->mc[0]))
       goto error_mc_y;
 
@@ -265,9 +226,6 @@ error_mc_cb:
    vl_mc_cleanup_buffer(&buf->mc[0]);
 
 error_mc_y:
-   buf->mc_source->destroy(buf->mc_source);
-
-error_mc_source:
    return false;
 }
 
@@ -280,8 +238,6 @@ cleanup_mc_buffer(struct vl_mpeg12_buffer *buf)
 
    for (i = 0; i < VL_MAX_PLANES; ++i)
       vl_mc_cleanup_buffer(&buf->mc[i]);
-
-   buf->mc_source->destroy(buf->mc_source);
 }
 
 static void
@@ -462,10 +418,12 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
 
    vl_mc_cleanup(&dec->mc_y);
    vl_mc_cleanup(&dec->mc_c);
+   dec->mc_source->destroy(dec->mc_source);
 
    if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
       vl_idct_cleanup(&dec->idct_y);
       vl_idct_cleanup(&dec->idct_c);
+      dec->idct_source->destroy(dec->idct_source);
    }
 
    vl_zscan_cleanup(&dec->zscan_y);
@@ -602,7 +560,7 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
          vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_ycbcr_blocks[i]);
    }
 
-   mc_source_sv = buf->mc_source->get_sampler_view_planes(buf->mc_source);
+   mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
    for (i = 0, component = 0; i < VL_MAX_PLANES; ++i) {
       if (!surfaces[i]) continue;
 
@@ -726,29 +684,26 @@ init_zscan(struct vl_mpeg12_decoder *dec)
 }
 
 static bool
-init_idct(struct vl_mpeg12_decoder *dec)
+init_idct(struct vl_mpeg12_decoder *dec, float *mc_scale)
 {
+   unsigned nr_of_idct_render_targets;
+   enum pipe_format formats[3];
+
    struct pipe_sampler_view *matrix, *transpose = NULL;
    float matrix_scale, transpose_scale;
 
-   dec->nr_of_idct_render_targets = dec->pipe->screen->get_param(dec->pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS);
+   nr_of_idct_render_targets = dec->pipe->screen->get_param(dec->pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS);
 
    // more than 4 render targets usually doesn't makes any seens
-   dec->nr_of_idct_render_targets = MIN2(dec->nr_of_idct_render_targets, 4);
+   nr_of_idct_render_targets = MIN2(nr_of_idct_render_targets, 4);
 
-   dec->idct_source_format = find_first_supported_format(dec, const_idct_source_formats,
-                                                         num_idct_source_formats, PIPE_TEXTURE_2D);
-
-   if (dec->idct_source_format == PIPE_FORMAT_NONE)
-      return false;
+   formats[0] = formats[1] = formats[2] = find_first_supported_format(dec, const_idct_source_formats,
+                                                                      num_idct_source_formats, PIPE_TEXTURE_2D);
 
-   dec->mc_source_format = find_first_supported_format(dec, const_idct_intermediate_formats,
-                                                       num_idct_intermediate_formats, PIPE_TEXTURE_3D);
+   switch (formats[0]) {
+   case PIPE_FORMAT_NONE:
+      goto error_idct_format;
 
-   if (dec->mc_source_format == PIPE_FORMAT_NONE)
-      return false;
-
-   switch (dec->idct_source_format) {
    case PIPE_FORMAT_R16G16B16A16_SSCALED:
       matrix_scale = SCALE_FACTOR_SSCALED;
       break;
@@ -762,14 +717,46 @@ init_idct(struct vl_mpeg12_decoder *dec)
       return false;
    }
 
-   if (dec->mc_source_format == PIPE_FORMAT_R16G16B16A16_FLOAT ||
-       dec->mc_source_format == PIPE_FORMAT_R32G32B32A32_FLOAT)
+   dec->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                           dec->base.width / 4, dec->base.height, 1,
+                                           dec->base.chroma_format,
+                                           formats, PIPE_USAGE_STATIC);
+   if (!dec->idct_source)
+      goto error_idct_source;
+
+   formats[0] = formats[1] = formats[2] = find_first_supported_format(dec, const_idct_intermediate_formats,
+                                                                      num_idct_intermediate_formats, PIPE_TEXTURE_3D);
+
+   switch (formats[0]) {
+   case PIPE_FORMAT_NONE:
+      goto error_mc_format;
+
+   case PIPE_FORMAT_R16G16B16A16_FLOAT:
+   case PIPE_FORMAT_R32G32B32A32_FLOAT:
       transpose_scale = 1.0f;
-   else
-      transpose_scale = matrix_scale = sqrt(matrix_scale);
+      *mc_scale = 1.0f;
+      break;
 
-   if (dec->mc_source_format == PIPE_FORMAT_R16_SSCALED)
+   case PIPE_FORMAT_R16_SSCALED:
+      transpose_scale = matrix_scale = sqrt(matrix_scale);
       transpose_scale /= SCALE_FACTOR_SSCALED;
+      *mc_scale = SCALE_FACTOR_SSCALED;
+      break;
+
+   default:
+      transpose_scale = matrix_scale = sqrt(matrix_scale);
+      *mc_scale = 1.0f;
+      break;
+   }
+
+   dec->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                         dec->base.width / nr_of_idct_render_targets,
+                                         dec->base.height / 4, nr_of_idct_render_targets,
+                                         dec->base.chroma_format,
+                                         formats, PIPE_USAGE_STATIC);
+
+   if (!dec->mc_source)
+      goto error_mc_source;
 
    if (!(matrix = vl_idct_upload_matrix(dec->pipe, matrix_scale)))
       goto error_matrix;
@@ -781,15 +768,16 @@ init_idct(struct vl_mpeg12_decoder *dec)
       pipe_sampler_view_reference(&transpose, matrix);
 
    if (!vl_idct_init(&dec->idct_y, dec->pipe, dec->base.width, dec->base.height,
-                     dec->nr_of_idct_render_targets, matrix, transpose))
+                     nr_of_idct_render_targets, matrix, transpose))
       goto error_y;
 
    if(!vl_idct_init(&dec->idct_c, dec->pipe, dec->chroma_width, dec->chroma_height,
-                    dec->nr_of_idct_render_targets, matrix, transpose))
+                    nr_of_idct_render_targets, matrix, transpose))
       goto error_c;
 
    pipe_sampler_view_reference(&matrix, NULL);
    pipe_sampler_view_reference(&transpose, NULL);
+
    return true;
 
 error_c:
@@ -802,9 +790,50 @@ error_transpose:
    pipe_sampler_view_reference(&matrix, NULL);
 
 error_matrix:
+   dec->mc_source->destroy(dec->mc_source);
+
+error_mc_source:
+error_mc_format:
+   dec->idct_source->destroy(dec->idct_source);
+
+error_idct_source:
+error_idct_format:
    return false;
 }
 
+static bool
+init_mc_source_widthout_idct(struct vl_mpeg12_decoder *dec, float *mc_scale)
+{
+   enum pipe_format formats[3];
+
+   formats[0] = formats[1] = formats[2] = find_first_supported_format(dec, const_mc_source_formats,
+                                                                      num_mc_source_formats, PIPE_TEXTURE_2D);
+
+   switch (formats[0]) {
+   case PIPE_FORMAT_NONE:
+      return false;
+
+   case PIPE_FORMAT_R16_SNORM:
+      *mc_scale = SCALE_FACTOR_SNORM;
+      break;
+
+   case PIPE_FORMAT_R16_SSCALED:
+      *mc_scale = SCALE_FACTOR_SSCALED;
+      break;
+
+   default:
+      assert(0);
+      return false;
+   }
+
+   dec->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
+                                         dec->base.width, dec->base.height, 1,
+                                         dec->base.chroma_format,
+                                         formats, PIPE_USAGE_STATIC);
+
+   return dec->mc_source;
+}
+
 static void
 mc_vert_shader_callback(void *priv, struct vl_mc *mc,
                         struct ureg_program *shader,
@@ -907,32 +936,11 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
       goto error_zscan;
 
    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
-      if (!init_idct(dec))
-         goto error_idct;
-      if (dec->mc_source_format == PIPE_FORMAT_R16_SSCALED)
-         mc_scale = SCALE_FACTOR_SSCALED;
-      else
-         mc_scale = 1.0f;
+      if (!init_idct(dec, &mc_scale))
+         goto error_sources;
    } else {
-      dec->mc_source_format = find_first_supported_format(dec, const_mc_source_formats,
-                                                          num_mc_source_formats, PIPE_TEXTURE_3D);
-
-      if (dec->mc_source_format == PIPE_FORMAT_NONE)
-         return NULL;
-
-      switch (dec->mc_source_format) {
-      case PIPE_FORMAT_R16_SNORM:
-         mc_scale = SCALE_FACTOR_SNORM;
-         break;
-
-      case PIPE_FORMAT_R16_SSCALED:
-         mc_scale = SCALE_FACTOR_SSCALED;
-         break;
-
-      default:
-         assert(0);
-         return NULL;
-      }
+      if (!init_mc_source_widthout_idct(dec, &mc_scale))
+         goto error_sources;
    }
 
    if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, mc_scale,
@@ -959,9 +967,11 @@ error_mc_y:
    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
       vl_idct_cleanup(&dec->idct_y);
       vl_idct_cleanup(&dec->idct_c);
+      dec->idct_source->destroy(dec->idct_source);
    }
+   dec->mc_source->destroy(dec->mc_source);
 
-error_idct:
+error_sources:
    vl_zscan_cleanup(&dec->zscan_y);
    vl_zscan_cleanup(&dec->zscan_c);
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 0ea71d11ca7..4fc19ff9524 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -50,11 +50,8 @@ struct vl_mpeg12_decoder
 
    unsigned blocks_per_line;
    unsigned max_blocks;
-   unsigned nr_of_idct_render_targets;
 
    enum pipe_format zscan_source_format;
-   enum pipe_format idct_source_format;
-   enum pipe_format mc_source_format;
 
    struct pipe_vertex_buffer quads;
    struct pipe_vertex_buffer pos;
@@ -68,6 +65,9 @@ struct vl_mpeg12_decoder
    struct pipe_sampler_view *zscan_normal;
    struct pipe_sampler_view *zscan_alternate;
 
+   struct pipe_video_buffer *idct_source;
+   struct pipe_video_buffer *mc_source;
+
    struct vl_zscan zscan_y, zscan_c;
    struct vl_idct idct_y, idct_c;
    struct vl_mc mc_y, mc_c;
@@ -82,8 +82,6 @@ struct vl_mpeg12_buffer
    struct vl_vertex_buffer vertex_stream;
 
    struct pipe_video_buffer *zscan_source;
-   struct pipe_video_buffer *idct_source;
-   struct pipe_video_buffer *mc_source;
 
    struct vl_mpg12_bs bs;
    struct vl_zscan_buffer zscan[VL_MAX_PLANES];
-- 
cgit v1.2.3


From 5d7c8130b80ffc27fc73a2ca1a55352d4b4e34c4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 15 May 2011 14:53:13 +0200
Subject: [g3dvl] pre apply zscan to quant matrix

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 119 +++++++++++--------------
 1 file changed, 51 insertions(+), 68 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index a7aa5cdd073..8854988194f 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -721,11 +721,9 @@ get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs)
 }
 
 static inline void
-get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                    const int scan[64], int quantizer_scale, short *dest)
+get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
 {
-   int i, j, val;
-   uint8_t *quant_matrix = picture->intra_quantizer_matrix;
+   int i, val;
    int mismatch;
    const DCTtab *tab;
 
@@ -744,11 +742,9 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
             break;	/* end of block */
 
       normal_code:
-         j = scan[i];
-
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+         val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
 
          /* if (bitstream_get (1)) val = -val; */
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
@@ -776,11 +772,9 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          if (i >= 64)
             break;	/* illegal, check needed to avoid buffer overflow */
 
-         j = scan[i];
-
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
-         val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[j]) / 16;
+         val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[i]) / 16;
 
          SATURATE (val);
          dest[i] = val;
@@ -822,11 +816,9 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 }
 
 static inline void
-get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                    const int scan[64], int quantizer_scale, short *dest)
+get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
 {
-   int i, j, val;
-   uint8_t *quant_matrix = picture->intra_quantizer_matrix;
+   int i, val;
    int mismatch;
    const DCTtab * tab;
 
@@ -844,10 +836,9 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          if (i < 64) {
 
          normal_code:
-            j = scan[i];
             bs->vlc.buf <<= tab->len;
             bs->vlc.bits += tab->len + 1;
-            val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+            val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
 
             /* if (bitstream_get (1)) val = -val; */
             val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
@@ -874,11 +865,9 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
             if (i >= 64)
                 break;	/* illegal, check against buffer overflow */
 
-            j = scan[i];
-
             vl_vlc_dumpbits(&bs->vlc, 12);
             vl_vlc_needbits(&bs->vlc);
-            val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[j]) / 16;
+            val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[i]) / 16;
 
             SATURATE (val);
             dest[i] = val;
@@ -921,11 +910,9 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 }
 
 static inline void
-get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                    const int scan[64], int quantizer_scale, short *dest)
+get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
 {
-   int i, j, val;
-   uint8_t *quant_matrix = picture->non_intra_quantizer_matrix;
+   int i, val;
    int mismatch;
    const DCTtab *tab;
 
@@ -950,10 +937,9 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
             break;	/* end of block */
 
       normal_code:
-         j = scan[i];
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;
+         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[i]) >> 5;
 
          /* if (bitstream_get (1)) val = -val; */
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
@@ -984,12 +970,10 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
          if (i >= 64)
             break;	/* illegal, check needed to avoid buffer overflow */
 
-         j = scan[i];
-
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
          val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1;
-         val = (val * quantizer_scale * quant_matrix[j]) / 32;
+         val = (val * quantizer_scale * quant_matrix[i]) / 32;
 
          SATURATE (val);
          dest[i] = val;
@@ -1030,11 +1014,9 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 }
 
 static inline void
-get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                      const int scan[64], int quantizer_scale, short *dest)
+get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
 {
-   int i, j, val;
-   uint8_t *quant_matrix = picture->intra_quantizer_matrix;
+   int i, val;
    const DCTtab * tab;
 
    i = 0;
@@ -1051,10 +1033,9 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
             break;	/* end of block */
 
       normal_code:
-         j = scan[i];
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;
+         val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
 
          /* oddification */
          val = (val - 1) | 1;
@@ -1084,8 +1065,6 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
          if (i >= 64)
             break;	/* illegal, check needed to avoid buffer overflow */
 
-         j = scan[i];
-
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
          val = vl_vlc_sbits(&bs->vlc, 8);
@@ -1093,7 +1072,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
             vl_vlc_dumpbits(&bs->vlc, 8);
             val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
          }
-         val = (val * quantizer_scale * quant_matrix[j]) / 16;
+         val = (val * quantizer_scale * quant_matrix[i]) / 16;
 
          /* oddification */
          val = (val + ~SBITS (val, 1)) | 1;
@@ -1135,11 +1114,9 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *
 }
 
 static inline void
-get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-                          const int scan[64], int quantizer_scale, short *dest)
+get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
 {
-   int i, j, val;
-   uint8_t *quant_matrix = picture->non_intra_quantizer_matrix;
+   int i, val;
    const DCTtab * tab;
 
    i = -1;
@@ -1162,10 +1139,9 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
             break;	/* end of block */
 
       normal_code:
-         j = scan[i];
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;
+         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[i]) >> 5;
 
          /* oddification */
          val = (val - 1) | 1;
@@ -1198,8 +1174,6 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
          if (i >= 64)
             break;	/* illegal, check needed to avoid buffer overflow */
 
-         j = scan[i];
-
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
          val = vl_vlc_sbits(&bs->vlc, 8);
@@ -1208,7 +1182,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
             val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
          }
          val = 2 * (val + SBITS (val, 1)) + 1;
-         val = (val * quantizer_scale * quant_matrix[j]) / 32;
+         val = (val * quantizer_scale * quant_matrix[i]) / 32;
 
          /* oddification */
          val = (val + ~SBITS (val, 1)) | 1;
@@ -1250,7 +1224,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des
 }
 
 static inline void
-slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int scan[64], int cc,
+slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int quant_matrix[64], int cc,
                  unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3])
 {
    short dest[64];
@@ -1272,11 +1246,11 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
    dest[0] = dc_dct_pred[cc] << (3 - picture->intra_dc_precision);
    if (picture->mpeg1) {
       if (picture->picture_coding_type != D_TYPE)
-          get_mpeg1_intra_block(bs, picture, scan, quantizer_scale, dest);
+          get_mpeg1_intra_block(bs, quant_matrix, quantizer_scale, dest);
    } else if (picture->intra_vlc_format)
-      get_intra_block_B15(bs, picture, scan, quantizer_scale, dest);
+      get_intra_block_B15(bs, quant_matrix, quantizer_scale, dest);
    else
-      get_intra_block_B14(bs, picture, scan, quantizer_scale, dest);
+      get_intra_block_B14(bs, quant_matrix, quantizer_scale, dest);
 
    memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
 
@@ -1286,8 +1260,8 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
 }
 
 static inline void
-slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int scan[64], int cc,
-                    unsigned x, unsigned y, int quantizer_scale, enum pipe_mpeg12_dct_type coding)
+slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int quant_matrix[64], int cc,
+                    unsigned x, unsigned y,  enum pipe_mpeg12_dct_type coding, int quantizer_scale)
 {
    short dest[64];
 
@@ -1298,9 +1272,9 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 
    memset(dest, 0, sizeof(int16_t) * 64);
    if (picture->mpeg1)
-      get_mpeg1_non_intra_block(bs, picture, scan, quantizer_scale, dest);
+      get_mpeg1_non_intra_block(bs, quant_matrix, quantizer_scale, dest);
    else
-      get_non_intra_block(bs, picture, scan, quantizer_scale, dest);
+      get_non_intra_block(bs, quant_matrix, quantizer_scale, dest);
 
    memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
 
@@ -1619,7 +1593,8 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
 }
 
 static inline bool
-decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, const int scan[64])
+decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
+             const int intra_quantizer_matrix[64], const int non_intra_quantizer_matrix[64])
 {
    enum pipe_video_field_select default_field_select;
    struct pipe_motionvector mv_fwd, mv_bwd;
@@ -1706,12 +1681,12 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
          mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
 
          // unravaled loop of 6 block(i) calls in macroblock()
-         slice_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, scan, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, scan, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, intra_quantizer_matrix, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, intra_quantizer_matrix, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
 
          if (picture->picture_coding_type == D_TYPE) {
             vl_vlc_needbits(&bs->vlc);
@@ -1769,17 +1744,17 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c
 
             // TODO  optimize not fully used for idct accel only mc.
             if (coded_block_pattern & 0x20)
-               slice_non_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+0, quantizer_scale, dct_type); // cc0  luma 0
+               slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0  luma 0
             if (coded_block_pattern & 0x10)
-               slice_non_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+0, quantizer_scale, dct_type); // cc0 luma 1
+               slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1
             if (coded_block_pattern & 0x08)
-               slice_non_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+1, quantizer_scale, dct_type); // cc0 luma 2
+               slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2
             if (coded_block_pattern & 0x04)
-               slice_non_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+1, quantizer_scale, dct_type); // cc0 luma 3
+               slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3
             if (coded_block_pattern & 0x2)
-               slice_non_intra_DCT(bs, picture, scan, 1, x, y, quantizer_scale, PIPE_MPEG12_DCT_TYPE_FRAME); // cc1 croma
+               slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma
             if (coded_block_pattern & 0x1)
-               slice_non_intra_DCT(bs, picture, scan, 2, x, y, quantizer_scale, PIPE_MPEG12_DCT_TYPE_FRAME); // cc2 croma
+               slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma
          }
 
          dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
@@ -1881,7 +1856,11 @@ void
 vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
                    struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3])
 {
+   int intra_quantizer_matrix[64];
+   int non_intra_quantizer_matrix[64];
+
    const int *scan;
+   unsigned i;
 
    assert(bs);
    assert(num_ycbcr_blocks);
@@ -1892,6 +1871,10 @@ vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffe
    vl_vlc_init(&bs->vlc, buffer, num_bytes);
 
    scan = picture->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
+   for (i = 0; i < 64; ++i) {
+      intra_quantizer_matrix[i] = picture->intra_quantizer_matrix[scan[i]];
+      non_intra_quantizer_matrix[i] = picture->non_intra_quantizer_matrix[scan[i]];
+   }
 
-   while(decode_slice(bs, picture, scan));
+   while(decode_slice(bs, picture, intra_quantizer_matrix, non_intra_quantizer_matrix));
 }
-- 
cgit v1.2.3


From 235de23e57bd6dac6a2fcdd0807838eef72f6173 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 15 May 2011 15:27:38 +0200
Subject: [g3dvl] store mvpos seperately from x,y cord

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 50 ++++++++++++++------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 8854988194f..508bb9fab19 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -1506,36 +1506,27 @@ do {							\
       routine(bs, picture->f_code[1], &mv_bwd);         \
 } while (0)
 
-#define NEXT_MACROBLOCK		                \
-do {				                \
-   ++x;				                \
-   if (x == bs->width) {	                \
-      ++y;                                      \
-      if (y >= bs->height)                      \
-         return false;                          \
-      x = 0;                                    \
-   }                                            \
-} while (0)
-
 static inline void
-store_motionvectors(struct vl_mpg12_bs *bs, int x, int y,
+store_motionvectors(struct vl_mpg12_bs *bs, unsigned *mv_pos,
                     struct pipe_motionvector *mv_fwd,
                     struct pipe_motionvector *mv_bwd)
 {
-   bs->mv_stream[0][x+y*bs->width].top = mv_fwd->top;
-   bs->mv_stream[0][x+y*bs->width].bottom =
+   bs->mv_stream[0][*mv_pos].top = mv_fwd->top;
+   bs->mv_stream[0][*mv_pos].bottom =
       mv_fwd->top.field_select == PIPE_VIDEO_FRAME ?
       mv_fwd->top : mv_fwd->bottom;
 
-   bs->mv_stream[1][x+y*bs->width].top = mv_bwd->top;
-   bs->mv_stream[1][x+y*bs->width].bottom =
+   bs->mv_stream[1][*mv_pos].top = mv_bwd->top;
+   bs->mv_stream[1][*mv_pos].bottom =
       mv_bwd->top.field_select == PIPE_VIDEO_FRAME ?
       mv_bwd->top : mv_bwd->bottom;
+
+   (*mv_pos)++;
 }
 
 static inline bool
 slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
-           int *quantizer_scale, int *x, int *y)
+           int *quantizer_scale, unsigned *x, unsigned *y, unsigned *mv_pos)
 {
    const MBAtab * mba;
 
@@ -1589,6 +1580,8 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
    if (*y > bs->height)
       return false;
 
+   *mv_pos = *x + *y * bs->width;
+
    return true;
 }
 
@@ -1604,7 +1597,7 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
    int dc_dct_pred[3] = { 0, 0, 0 };
    int quantizer_scale;
 
-   int x, y;
+   unsigned x, y, mv_pos;
 
    switch(picture->picture_structure) {
    case TOP_FIELD:
@@ -1620,7 +1613,7 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
       break;
    }
 
-   if (!slice_init(bs, picture, &quantizer_scale, &x, &y))
+   if (!slice_init(bs, picture, &quantizer_scale, &x, &y, &mv_pos))
       return false;
 
    mv_fwd.top.x = mv_fwd.top.y = mv_fwd.bottom.x = mv_fwd.bottom.y = 0;
@@ -1760,8 +1753,13 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
          dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
       }
 
-      store_motionvectors(bs, x, y, &mv_fwd, &mv_bwd);
-      NEXT_MACROBLOCK;
+      store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd);
+      if (++x >= bs->width) {
+         ++y;
+         if (y >= bs->height)
+            return false;
+         x -= bs->width;
+      }
 
       vl_vlc_needbits(&bs->vlc);
       mba_inc = 0;
@@ -1798,11 +1796,17 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
             mv_fwd.top.weight = mv_fwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MAX;
          }
 
+         x += mba_inc;
          do {
-            store_motionvectors(bs, x, y, &mv_fwd, &mv_bwd);
-            NEXT_MACROBLOCK;
+            store_motionvectors(bs, &mv_pos, &mv_fwd, &mv_bwd);
          } while (--mba_inc);
       }
+      while (x >= bs->width) {
+         ++y;
+         if (y >= bs->height)
+            return false;
+         x -= bs->width;
+      }
    }
 }
 
-- 
cgit v1.2.3


From 20aabb9c2eff63fd97571b9f3db453fe3accc10a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 15 May 2011 19:05:30 +0200
Subject: [g3dvl] enable gl_rasterization_rules for zscan

---
 src/gallium/auxiliary/vl/vl_zscan.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
index d06c567bfb4..ec806982226 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.c
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -98,7 +98,7 @@ create_vert_shader(struct vl_zscan *zscan)
    struct ureg_dst tmp;
    struct ureg_dst o_vpos, o_vtex[zscan->num_channels];
 
-   unsigned i;
+   signed i;
 
    shader = ureg_create(TGSI_PROCESSOR_VERTEX);
    if (!shader)
@@ -139,13 +139,12 @@ create_vert_shader(struct vl_zscan *zscan)
    ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), instance,
             ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
 
-   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp));
+   ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
    ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp));
 
    for (i = 0; i < zscan->num_channels; ++i) {
-      if (i > 0)
-         ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(tmp),
-                  ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * BLOCK_WIDTH)));
+      ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
+               ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * BLOCK_WIDTH) * (i - (signed)zscan->num_channels / 2)));
 
       ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
                ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
@@ -256,7 +255,7 @@ init_state(struct vl_zscan *zscan)
    assert(zscan);
 
    memset(&rs_state, 0, sizeof(rs_state));
-   rs_state.gl_rasterization_rules = false;
+   rs_state.gl_rasterization_rules = true;
    zscan->rs_state = zscan->pipe->create_rasterizer_state(zscan->pipe, &rs_state);
    if (!zscan->rs_state)
       goto error_rs_state;
-- 
cgit v1.2.3


From 3db6514357a7c634045ae7bc7bba7d7dbf9d58c5 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 15 May 2011 19:21:21 +0200
Subject: [g3dvl] enable gl_rasterization_rules for idct

---
 src/gallium/auxiliary/vl/vl_idct.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index ebb4ad4fe0c..751aacac84d 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -207,7 +207,7 @@ create_stage1_frag_shader(struct vl_idct *idct)
    struct ureg_dst l[4][2], r[2];
    struct ureg_dst fragment[idct->nr_of_render_targets];
 
-   unsigned i, j;
+   int i, j;
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
@@ -230,24 +230,19 @@ create_stage1_frag_shader(struct vl_idct *idct)
    r[0] = ureg_DECL_temporary(shader);
    r[1] = ureg_DECL_temporary(shader);
 
-   for (i = 1; i < 4; ++i) {
-      increment_addr(shader, l[i], l_addr, false, false, i, idct->buffer_height);
+   for (i = 0; i < 4; ++i) {
+      increment_addr(shader, l[i], l_addr, false, false, i - 2, idct->buffer_height);
    }
 
    for (i = 0; i < 4; ++i) {
-      struct ureg_src s_addr[2];
-      s_addr[0] = i == 0 ? l_addr[0] : ureg_src(l[i][0]);
-      s_addr[1] = i == 0 ? l_addr[1] : ureg_src(l[i][1]);
+      struct ureg_src s_addr[2] = { ureg_src(l[i][0]), ureg_src(l[i][1]) };
       fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1));
    }
 
    for (i = 0; i < idct->nr_of_render_targets; ++i) {
-      if(i > 0)
-         increment_addr(shader, r, r_addr, true, true, i, BLOCK_HEIGHT);
+      increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, BLOCK_HEIGHT);
 
       struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) };
-      s_addr[0] = i == 0 ? r_addr[0] : ureg_src(r[0]);
-      s_addr[1] = i == 0 ? r_addr[1] : ureg_src(r[1]);
       fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0));
 
       for (j = 0; j < 4; ++j) {
@@ -372,7 +367,7 @@ init_state(struct vl_idct *idct)
    assert(idct);
 
    memset(&rs_state, 0, sizeof(rs_state));
-   rs_state.gl_rasterization_rules = false;
+   rs_state.gl_rasterization_rules = true;
    idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
    if (!idct->rs_state)
       goto error_rs_state;
-- 
cgit v1.2.3


From 3e976ef31a27ca9a23372f4364955f0f0a5c4ef4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 16 May 2011 00:26:44 +0200
Subject: [g3dvl] store the idct matrix as hex values

Give the dct matrix a higher precision.
---
 src/gallium/auxiliary/vl/vl_idct.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 751aacac84d..11f935afd73 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -49,15 +49,22 @@ enum VS_OUTPUT
    VS_O_R_ADDR1
 };
 
-static const float const_matrix[8][8] = {
-   {  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.3535530f,  0.353553f,  0.3535530f },
-   {  0.4903930f,  0.4157350f,  0.2777850f,  0.0975451f, -0.0975452f, -0.2777850f, -0.415735f, -0.4903930f },
-   {  0.4619400f,  0.1913420f, -0.1913420f, -0.4619400f, -0.4619400f, -0.1913420f,  0.191342f,  0.4619400f },
-   {  0.4157350f, -0.0975452f, -0.4903930f, -0.2777850f,  0.2777850f,  0.4903930f,  0.097545f, -0.4157350f },
-   {  0.3535530f, -0.3535530f, -0.3535530f,  0.3535540f,  0.3535530f, -0.3535540f, -0.353553f,  0.3535530f },
-   {  0.2777850f, -0.4903930f,  0.0975452f,  0.4157350f, -0.4157350f, -0.0975451f,  0.490393f, -0.2777850f },
-   {  0.1913420f, -0.4619400f,  0.4619400f, -0.1913420f, -0.1913410f,  0.4619400f, -0.461940f,  0.1913420f },
-   {  0.0975451f, -0.2777850f,  0.4157350f, -0.4903930f,  0.4903930f, -0.4157350f,  0.277786f, -0.0975458f }
+/**
+ * The DCT matrix stored as hex representation of floats. Equal to the following equation:
+ * for (i = 0; i < 8; ++i)
+ *    for (j = 0; j < 8; ++j)
+ *       if (i == 0) const_matrix[i][j] = 1.0f / sqrtf(8.0f);
+ *       else const_matrix[i][j] = sqrtf(2.0f / 8.0f) * cosf((2 * j + 1) * i * M_PI / (2.0f * 8.0f));
+ */
+static const uint32_t const_matrix[8][8] = {
+   { 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3, 0x3eb504f3 },
+   { 0x3efb14be, 0x3ed4db31, 0x3e8e39da, 0x3dc7c5c4, 0xbdc7c5c2, 0xbe8e39d9, 0xbed4db32, 0xbefb14bf },
+   { 0x3eec835f, 0x3e43ef15, 0xbe43ef14, 0xbeec835e, 0xbeec835f, 0xbe43ef1a, 0x3e43ef1b, 0x3eec835f },
+   { 0x3ed4db31, 0xbdc7c5c2, 0xbefb14bf, 0xbe8e39dd, 0x3e8e39d7, 0x3efb14bf, 0x3dc7c5d0, 0xbed4db34 },
+   { 0x3eb504f3, 0xbeb504f3, 0xbeb504f4, 0x3eb504f1, 0x3eb504f3, 0xbeb504f0, 0xbeb504ef, 0x3eb504f4 },
+   { 0x3e8e39da, 0xbefb14bf, 0x3dc7c5c8, 0x3ed4db32, 0xbed4db34, 0xbdc7c5bb, 0x3efb14bf, 0xbe8e39d7 },
+   { 0x3e43ef15, 0xbeec835f, 0x3eec835f, 0xbe43ef07, 0xbe43ef23, 0x3eec8361, 0xbeec835c, 0x3e43ef25 },
+   { 0x3dc7c5c4, 0xbe8e39dd, 0x3ed4db32, 0xbefb14c0, 0x3efb14be, 0xbed4db31, 0x3e8e39ce, 0xbdc7c596 },
 };
 
 static void
@@ -540,7 +547,7 @@ vl_idct_upload_matrix(struct pipe_context *pipe, float scale)
    for(i = 0; i < BLOCK_HEIGHT; ++i)
       for(j = 0; j < BLOCK_WIDTH; ++j)
          // transpose and scale
-         f[i * pitch + j] = const_matrix[j][i] * scale;
+         f[i * pitch + j] = ((const float (*)[8])const_matrix)[j][i] * scale;
 
    pipe->transfer_unmap(pipe, buf_transfer);
    pipe->transfer_destroy(pipe, buf_transfer);
-- 
cgit v1.2.3


From 07114b3fdc4e54965731de78ac932f6a1b6deeb7 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 16 May 2011 23:04:47 +0200
Subject: [g3dvl] softpipe now needs the llvm libs

---
 src/gallium/targets/xvmc-softpipe/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/targets/xvmc-softpipe/Makefile b/src/gallium/targets/xvmc-softpipe/Makefile
index 5b60bede589..ffc042900a7 100644
--- a/src/gallium/targets/xvmc-softpipe/Makefile
+++ b/src/gallium/targets/xvmc-softpipe/Makefile
@@ -14,7 +14,7 @@ PIPE_DRIVERS = \
 C_SOURCES = \
 	$(TOP)/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
 
-DRIVER_LIBS =
+DRIVER_LIBS = $(LLVM_LIBS)
 
 include ../Makefile.xvmc
 
-- 
cgit v1.2.3


From 9937e85bccbf2f6bd77d061ab0488d45e9366f10 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 16 May 2011 23:05:38 +0200
Subject: [g3dvl] add missing ureg_END

---
 src/gallium/auxiliary/vl/vl_mc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
index 7474c58250d..3b665fafb7d 100644
--- a/src/gallium/auxiliary/vl/vl_mc.c
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -363,6 +363,8 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale, vl_mc_ycbcr_frag_shader f
 
    ureg_release_temporary(shader, tmp);
 
+   ureg_END(shader);
+
    return ureg_create_shader_and_destroy(shader, r->pipe);
 }
 
-- 
cgit v1.2.3


From 9f64199d39cb42f0266e52451fa1c4d3440bc9e2 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 18 May 2011 00:10:59 +0200
Subject: [g3dvl] fix missing swizzle on instanceid

---
 src/gallium/auxiliary/vl/vl_zscan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
index ec806982226..0f468dfb5ab 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.c
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -136,7 +136,7 @@ create_vert_shader(struct vl_zscan *zscan)
    ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
 
-   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), instance,
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), ureg_scalar(instance, TGSI_SWIZZLE_X),
             ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
 
    ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
-- 
cgit v1.2.3


From 0ef773ff798b2dedf1b7e53d4b5d834e1c68de38 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 18 May 2011 21:29:39 +0200
Subject: vdpau: rename vdpau driver to r600/softpie

---
 src/gallium/targets/Makefile.vdpau          | 1 -
 src/gallium/targets/vdpau-r600/Makefile     | 2 ++
 src/gallium/targets/vdpau-softpipe/Makefile | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/targets/Makefile.vdpau b/src/gallium/targets/Makefile.vdpau
index 6aa01037afa..6b8443fe35c 100644
--- a/src/gallium/targets/Makefile.vdpau
+++ b/src/gallium/targets/Makefile.vdpau
@@ -1,6 +1,5 @@
 # This makefile template is used to build libvdpau_g3dvl.so
 
-LIBBASENAME = vdpau_g3dvl
 LIBNAME = lib$(LIBBASENAME).so
 VDPAU_LIB_GLOB=lib$(LIBBASENAME).*so*
 VDPAU_MAJOR = 1
diff --git a/src/gallium/targets/vdpau-r600/Makefile b/src/gallium/targets/vdpau-r600/Makefile
index efcaaaa4e18..07d40725767 100644
--- a/src/gallium/targets/vdpau-r600/Makefile
+++ b/src/gallium/targets/vdpau-r600/Makefile
@@ -1,6 +1,8 @@
 TOP = ../../../..
 include $(TOP)/configs/current
 
+LIBBASENAME = vdpau_r600
+
 PIPE_DRIVERS = \
         $(TOP)/src/gallium/drivers/r600/libr600.a \
 	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
diff --git a/src/gallium/targets/vdpau-softpipe/Makefile b/src/gallium/targets/vdpau-softpipe/Makefile
index 29dea50e7fb..139b01b982a 100644
--- a/src/gallium/targets/vdpau-softpipe/Makefile
+++ b/src/gallium/targets/vdpau-softpipe/Makefile
@@ -1,6 +1,8 @@
 TOP = ../../../..
 include $(TOP)/configs/current
 
+LIBBASENAME = vdpau_softpipe
+
 DRIVER_DEFINES = -DGALLIUM_SOFTPIPE
 DRIVER_INCLUDES =
 
-- 
cgit v1.2.3


From 120b55a96e30f1f74ba3448665cef3d724fed647 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 21 May 2011 15:37:29 +0200
Subject: r600g: fix "Fixed-Point Data Conversions"

According to OpenGL 3.1 chapter 2.1.5 the representation without zero
should only be used for vertex attribute values, but not for textures
or frame-buffers.
---
 src/gallium/drivers/r600/evergreen_state.c | 2 +-
 src/gallium/drivers/r600/r600_state.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 654b04ea1bd..187f00e1e52 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -424,7 +424,7 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
 				(tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
 	r600_pipe_state_add_reg(rstate, R_030010_RESOURCE0_WORD4,
 				word4 |
-				S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_NO_ZERO) |
+				S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
 				S_030010_ENDIAN_SWAP(endian) |
 				S_030010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_030014_RESOURCE0_WORD5,
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 8098e489d0f..0a1fa723e13 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -490,7 +490,7 @@ static struct pipe_sampler_view *r600_create_sampler_view(struct pipe_context *c
 				(tmp->offset[1] + r600_bo_offset(bo[1])) >> 8, 0xFFFFFFFF, bo[1]);
 	r600_pipe_state_add_reg(rstate, R_038010_RESOURCE0_WORD4,
 				word4 |
-				S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_NO_ZERO) |
+				S_038010_SRF_MODE_ALL(V_038010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
 				S_038010_REQUEST_SIZE(1) |
 				S_038010_ENDIAN_SWAP(endian) |
 				S_038010_BASE_LEVEL(state->u.tex.first_level), 0xFFFFFFFF, NULL);
-- 
cgit v1.2.3


From 5705fb1dca061b097e568687101af396ec9e8d4c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 22 May 2011 18:28:09 +0200
Subject: r600g: revert commit 36b322dffd2429130f132f55f68acb1a23ba1658 for now

Disable scaled textures, since they didn't work 100% right now.
---
 src/gallium/drivers/r600/r600_state_inlines.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 39c647835a6..3254012ddf5 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -372,7 +372,6 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
 	case PIPE_FORMAT_R32G32_FLOAT:
 	case PIPE_FORMAT_R16G16B16A16_UNORM:
 	case PIPE_FORMAT_R16G16B16A16_SNORM:
-	case PIPE_FORMAT_R16G16B16A16_SSCALED:
 	case PIPE_FORMAT_R16G16B16A16_FLOAT:
 
 		/* 128-bit buffers. */
@@ -473,7 +472,6 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
 	case PIPE_FORMAT_R16G16B16_USCALED:
 	case PIPE_FORMAT_R16G16B16A16_USCALED:
 	case PIPE_FORMAT_R16G16B16_SSCALED:
-	case PIPE_FORMAT_R16G16B16A16_SSCALED:
 	case PIPE_FORMAT_R16G16B16A16_UNORM:
 	case PIPE_FORMAT_R16G16B16A16_SNORM:
 		return V_0280A0_COLOR_16_16_16_16;
-- 
cgit v1.2.3


From 311eb749a1ab7ffd417bc456345d63eba75e3fec Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 22 May 2011 18:36:47 +0200
Subject: [g3dvl] rework resource format handling

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 219 +++++++++++----------------
 1 file changed, 88 insertions(+), 131 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 3887cf6f1b4..4337e083383 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -38,39 +38,42 @@
 #define SCALE_FACTOR_SNORM (32768.0f / 256.0f)
 #define SCALE_FACTOR_SSCALED (1.0f / 256.0f)
 
-static const enum pipe_format const_zscan_source_formats[] = {
-   PIPE_FORMAT_R16_SNORM,
-   PIPE_FORMAT_R16_SSCALED
-};
+struct format_config {
+   enum pipe_format zscan_source_format;
+   enum pipe_format idct_source_format;
+   enum pipe_format mc_source_format;
 
-static const unsigned num_zscan_source_formats =
-   sizeof(const_zscan_source_formats) / sizeof(enum pipe_format);
+   float idct_scale;
+   float mc_scale;
+};
 
-static const enum pipe_format const_idct_source_formats[] = {
-   PIPE_FORMAT_R16G16B16A16_SNORM,
-   PIPE_FORMAT_R16G16B16A16_SSCALED
+static const struct format_config bitstream_format_config[] = {
+   { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SSCALED },
+   { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, 1.0f, SCALE_FACTOR_SSCALED },
+   { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SNORM },
+   { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, 1.0f, SCALE_FACTOR_SNORM }
 };
 
-static const unsigned num_idct_source_formats =
-   sizeof(const_idct_source_formats) / sizeof(enum pipe_format);
+static const unsigned num_bitstream_format_configs =
+   sizeof(bitstream_format_config) / sizeof(struct format_config);
 
-static const enum pipe_format const_idct_intermediate_formats[] = {
-   PIPE_FORMAT_R16G16B16A16_FLOAT,
-   PIPE_FORMAT_R16G16B16A16_SNORM,
-   PIPE_FORMAT_R16G16B16A16_SSCALED,
-   PIPE_FORMAT_R32G32B32A32_FLOAT
+static const struct format_config idct_format_config[] = {
+   { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SSCALED },
+   { PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R16G16B16A16_SSCALED, 1.0f, SCALE_FACTOR_SSCALED },
+   { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_FLOAT, 1.0f, SCALE_FACTOR_SNORM },
+   { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, PIPE_FORMAT_R16G16B16A16_SNORM, 1.0f, SCALE_FACTOR_SNORM }
 };
 
-static const unsigned num_idct_intermediate_formats =
-   sizeof(const_idct_intermediate_formats) / sizeof(enum pipe_format);
+static const unsigned num_idct_format_configs =
+   sizeof(idct_format_config) / sizeof(struct format_config);
 
-static const enum pipe_format const_mc_source_formats[] = {
-   PIPE_FORMAT_R16_SNORM,
-   PIPE_FORMAT_R16_SSCALED
+static const struct format_config mc_format_config[] = {
+   //{ PIPE_FORMAT_R16_SSCALED, PIPE_FORMAT_NONE, PIPE_FORMAT_R16_SSCALED, 0.0f, SCALE_FACTOR_SSCALED },
+   { PIPE_FORMAT_R16_SNORM, PIPE_FORMAT_NONE, PIPE_FORMAT_R16_SNORM, 0.0f, SCALE_FACTOR_SNORM }
 };
 
-static const unsigned num_mc_source_formats =
-   sizeof(const_mc_source_formats) / sizeof(enum pipe_format);
+static const unsigned num_mc_format_configs =
+   sizeof(mc_format_config) / sizeof(struct format_config);
 
 static bool
 init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
@@ -627,11 +630,8 @@ init_pipe_state(struct vl_mpeg12_decoder *dec)
    return true;
 }
 
-static enum pipe_format
-find_first_supported_format(struct vl_mpeg12_decoder *dec,
-                            const enum pipe_format formats[],
-                            unsigned num_formats,
-                            enum pipe_texture_target target)
+static const struct format_config*
+find_format_config(struct vl_mpeg12_decoder *dec, const struct format_config configs[], unsigned num_configs)
 {
    struct pipe_screen *screen;
    unsigned i;
@@ -640,16 +640,32 @@ find_first_supported_format(struct vl_mpeg12_decoder *dec,
 
    screen = dec->pipe->screen;
 
-   for (i = 0; i < num_formats; ++i)
-      if (screen->is_format_supported(dec->pipe->screen, formats[i], target, 1,
-                                      PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
-         return formats[i];
+   for (i = 0; i < num_configs; ++i) {
+      if (!screen->is_format_supported(screen, configs[i].zscan_source_format, PIPE_TEXTURE_2D,
+                                       1, PIPE_BIND_SAMPLER_VIEW))
+         continue;
+
+      if (configs[i].idct_source_format != PIPE_FORMAT_NONE) {
+         if (!screen->is_format_supported(screen, configs[i].idct_source_format, PIPE_TEXTURE_2D,
+                                          1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
+            continue;
+
+         if (!screen->is_format_supported(screen, configs[i].mc_source_format, PIPE_TEXTURE_3D,
+                                          1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
+            continue;
+      } else {
+         if (!screen->is_format_supported(screen, configs[i].mc_source_format, PIPE_TEXTURE_2D,
+                                          1, PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET))
+            continue;
+      }
+      return &configs[i];
+   }
 
-   return PIPE_FORMAT_NONE;
+   return NULL;
 }
 
 static bool
-init_zscan(struct vl_mpeg12_decoder *dec)
+init_zscan(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
 {
    unsigned num_channels;
 
@@ -660,12 +676,7 @@ init_zscan(struct vl_mpeg12_decoder *dec)
       (dec->base.width * dec->base.height) /
       (BLOCK_WIDTH * BLOCK_HEIGHT);
 
-   dec->zscan_source_format = find_first_supported_format(dec, const_zscan_source_formats,
-                                                          num_zscan_source_formats, PIPE_TEXTURE_2D);
-
-   if (dec->zscan_source_format == PIPE_FORMAT_NONE)
-      return false;
-
+   dec->zscan_source_format = format_config->zscan_source_format;
    dec->zscan_linear = vl_zscan_layout(dec->pipe, vl_zscan_linear, dec->blocks_per_line);
    dec->zscan_normal = vl_zscan_layout(dec->pipe, vl_zscan_normal, dec->blocks_per_line);
    dec->zscan_alternate = vl_zscan_layout(dec->pipe, vl_zscan_alternate, dec->blocks_per_line);
@@ -684,39 +695,19 @@ init_zscan(struct vl_mpeg12_decoder *dec)
 }
 
 static bool
-init_idct(struct vl_mpeg12_decoder *dec, float *mc_scale)
+init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
 {
    unsigned nr_of_idct_render_targets;
    enum pipe_format formats[3];
 
-   struct pipe_sampler_view *matrix, *transpose = NULL;
-   float matrix_scale, transpose_scale;
+   struct pipe_sampler_view *matrix = NULL;
 
    nr_of_idct_render_targets = dec->pipe->screen->get_param(dec->pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS);
 
    // more than 4 render targets usually doesn't makes any seens
    nr_of_idct_render_targets = MIN2(nr_of_idct_render_targets, 4);
 
-   formats[0] = formats[1] = formats[2] = find_first_supported_format(dec, const_idct_source_formats,
-                                                                      num_idct_source_formats, PIPE_TEXTURE_2D);
-
-   switch (formats[0]) {
-   case PIPE_FORMAT_NONE:
-      goto error_idct_format;
-
-   case PIPE_FORMAT_R16G16B16A16_SSCALED:
-      matrix_scale = SCALE_FACTOR_SSCALED;
-      break;
-
-   case PIPE_FORMAT_R16G16B16A16_SNORM:
-      matrix_scale = SCALE_FACTOR_SNORM;
-      break;
-
-   default:
-      assert(0);
-      return false;
-   }
-
+   formats[0] = formats[1] = formats[2] = format_config->idct_source_format;
    dec->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
                                            dec->base.width / 4, dec->base.height, 1,
                                            dec->base.chroma_format,
@@ -724,31 +715,7 @@ init_idct(struct vl_mpeg12_decoder *dec, float *mc_scale)
    if (!dec->idct_source)
       goto error_idct_source;
 
-   formats[0] = formats[1] = formats[2] = find_first_supported_format(dec, const_idct_intermediate_formats,
-                                                                      num_idct_intermediate_formats, PIPE_TEXTURE_3D);
-
-   switch (formats[0]) {
-   case PIPE_FORMAT_NONE:
-      goto error_mc_format;
-
-   case PIPE_FORMAT_R16G16B16A16_FLOAT:
-   case PIPE_FORMAT_R32G32B32A32_FLOAT:
-      transpose_scale = 1.0f;
-      *mc_scale = 1.0f;
-      break;
-
-   case PIPE_FORMAT_R16_SSCALED:
-      transpose_scale = matrix_scale = sqrt(matrix_scale);
-      transpose_scale /= SCALE_FACTOR_SSCALED;
-      *mc_scale = SCALE_FACTOR_SSCALED;
-      break;
-
-   default:
-      transpose_scale = matrix_scale = sqrt(matrix_scale);
-      *mc_scale = 1.0f;
-      break;
-   }
-
+   formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
    dec->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
                                          dec->base.width / nr_of_idct_render_targets,
                                          dec->base.height / 4, nr_of_idct_render_targets,
@@ -758,25 +725,18 @@ init_idct(struct vl_mpeg12_decoder *dec, float *mc_scale)
    if (!dec->mc_source)
       goto error_mc_source;
 
-   if (!(matrix = vl_idct_upload_matrix(dec->pipe, matrix_scale)))
+   if (!(matrix = vl_idct_upload_matrix(dec->pipe, format_config->idct_scale)))
       goto error_matrix;
 
-   if (matrix_scale != transpose_scale) {
-      if (!(transpose = vl_idct_upload_matrix(dec->pipe, transpose_scale)))
-         goto error_transpose;
-   } else
-      pipe_sampler_view_reference(&transpose, matrix);
-
    if (!vl_idct_init(&dec->idct_y, dec->pipe, dec->base.width, dec->base.height,
-                     nr_of_idct_render_targets, matrix, transpose))
+                     nr_of_idct_render_targets, matrix, matrix))
       goto error_y;
 
    if(!vl_idct_init(&dec->idct_c, dec->pipe, dec->chroma_width, dec->chroma_height,
-                    nr_of_idct_render_targets, matrix, transpose))
+                    nr_of_idct_render_targets, matrix, matrix))
       goto error_c;
 
    pipe_sampler_view_reference(&matrix, NULL);
-   pipe_sampler_view_reference(&transpose, NULL);
 
    return true;
 
@@ -784,54 +744,30 @@ error_c:
    vl_idct_cleanup(&dec->idct_y);
 
 error_y:
-   pipe_sampler_view_reference(&transpose, NULL);
-
-error_transpose:
    pipe_sampler_view_reference(&matrix, NULL);
 
 error_matrix:
    dec->mc_source->destroy(dec->mc_source);
 
 error_mc_source:
-error_mc_format:
    dec->idct_source->destroy(dec->idct_source);
 
 error_idct_source:
-error_idct_format:
    return false;
 }
 
 static bool
-init_mc_source_widthout_idct(struct vl_mpeg12_decoder *dec, float *mc_scale)
+init_mc_source_widthout_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
 {
    enum pipe_format formats[3];
 
-   formats[0] = formats[1] = formats[2] = find_first_supported_format(dec, const_mc_source_formats,
-                                                                      num_mc_source_formats, PIPE_TEXTURE_2D);
-
-   switch (formats[0]) {
-   case PIPE_FORMAT_NONE:
-      return false;
-
-   case PIPE_FORMAT_R16_SNORM:
-      *mc_scale = SCALE_FACTOR_SNORM;
-      break;
-
-   case PIPE_FORMAT_R16_SSCALED:
-      *mc_scale = SCALE_FACTOR_SSCALED;
-      break;
-
-   default:
-      assert(0);
-      return false;
-   }
-
+   formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
    dec->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
                                          dec->base.width, dec->base.height, 1,
                                          dec->base.chroma_format,
                                          formats, PIPE_USAGE_STATIC);
 
-   return dec->mc_source;
+   return dec->mc_source != NULL;
 }
 
 static void
@@ -885,8 +821,8 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
                          enum pipe_video_chroma_format chroma_format,
                          unsigned width, unsigned height)
 {
+   const struct format_config *format_config;
    struct vl_mpeg12_decoder *dec;
-   float mc_scale;
 
    assert(u_reduce_video_profile(profile) == PIPE_VIDEO_CODEC_MPEG12);
 
@@ -932,23 +868,44 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
       dec->chroma_height = dec->base.height;
    }
 
-   if (!init_zscan(dec))
+   switch (entrypoint) {
+   case PIPE_VIDEO_ENTRYPOINT_BITSTREAM:
+      format_config = find_format_config(dec, bitstream_format_config, num_bitstream_format_configs);
+      break;
+
+   case PIPE_VIDEO_ENTRYPOINT_IDCT:
+      format_config = find_format_config(dec, idct_format_config, num_idct_format_configs);
+      break;
+
+   case PIPE_VIDEO_ENTRYPOINT_MC:
+      format_config = find_format_config(dec, mc_format_config, num_mc_format_configs);
+      break;
+
+   default:
+      assert(0);
+      return NULL;
+   }
+
+   if (!format_config)
+      return NULL;
+
+   if (!init_zscan(dec, format_config))
       goto error_zscan;
 
    if (entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) {
-      if (!init_idct(dec, &mc_scale))
+      if (!init_idct(dec, format_config))
          goto error_sources;
    } else {
-      if (!init_mc_source_widthout_idct(dec, &mc_scale))
+      if (!init_mc_source_widthout_idct(dec, format_config))
          goto error_sources;
    }
 
-   if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, mc_scale,
+   if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, format_config->mc_scale,
                    mc_vert_shader_callback, mc_frag_shader_callback, dec))
       goto error_mc_y;
 
    // TODO
-   if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, mc_scale,
+   if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, format_config->mc_scale,
                    mc_vert_shader_callback, mc_frag_shader_callback, dec))
       goto error_mc_c;
 
-- 
cgit v1.2.3


From 0e886219193472be2203f2afccea84029f014dbd Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 29 May 2011 12:30:58 +0200
Subject: Initial r300g support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based uppon a patch from Pali Rohár <pali.rohar@gmail.com>.
This seems to get at least YUV->RGB conversion working.
So a simple "mplayer -vo vdpau" now seems to work fine.
---
 configure.ac                                  |   2 +-
 src/gallium/drivers/r300/Makefile             |   1 +
 src/gallium/drivers/r300/r300_screen.c        |   3 +-
 src/gallium/drivers/r300/r300_video_context.c | 308 +-------------------------
 src/gallium/drivers/r300/r300_video_context.h |  14 +-
 src/gallium/targets/va-r300/Makefile          |  26 +++
 src/gallium/targets/va-r300/target.c          |  24 ++
 src/gallium/targets/vdpau-r300/Makefile       |  27 +++
 src/gallium/targets/vdpau-r300/target.c       |  24 ++
 src/gallium/targets/xvmc-r300/Makefile        |  22 ++
 src/gallium/targets/xvmc-r300/target.c        |  24 ++
 11 files changed, 162 insertions(+), 313 deletions(-)
 create mode 100644 src/gallium/targets/va-r300/Makefile
 create mode 100644 src/gallium/targets/va-r300/target.c
 create mode 100644 src/gallium/targets/vdpau-r300/Makefile
 create mode 100644 src/gallium/targets/vdpau-r300/target.c
 create mode 100644 src/gallium/targets/xvmc-r300/Makefile
 create mode 100644 src/gallium/targets/xvmc-r300/target.c

diff --git a/configure.ac b/configure.ac
index 03f26847c19..371791220e6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1839,7 +1839,7 @@ if test "x$enable_gallium_r300" = xauto; then
     gallium_check_st "radeon/drm" "dri-r300"
 elif test "x$enable_gallium_r300" = xyes; then
     GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS r300"
-    gallium_check_st "radeon/drm" "dri-r300" "xorg-radeon"
+    gallium_check_st "radeon/drm" "dri-r300" "xorg-radeon" "xvmc-r300" "vdpau-r300" "va-r300"
 fi
 
 dnl
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index c9401b99f1c..37b0f01cfd3 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -26,6 +26,7 @@ C_SOURCES = \
 	r300_texture.c \
 	r300_texture_desc.c \
 	r300_tgsi_to_rc.c \
+	r300_video_context.c \
 	r300_transfer.c
 
 LIBRARY_INCLUDES = \
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 7a1366a4f8f..240b841ed2a 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -31,6 +31,7 @@
 #include "r300_screen_buffer.h"
 #include "r300_state_inlines.h"
 #include "r300_public.h"
+#include "r300_video_context.h"
 
 #include "draw/draw_context.h"
 
@@ -511,7 +512,7 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws)
     r300screen->screen.get_paramf = r300_get_paramf;
     r300screen->screen.is_format_supported = r300_is_format_supported;
     r300screen->screen.context_create = r300_create_context;
-
+    r300screen->screen.video_context_create = r300_video_create;
     r300screen->screen.fence_reference = r300_fence_reference;
     r300screen->screen.fence_signalled = r300_fence_signalled;
     r300screen->screen.fence_finish = r300_fence_finish;
diff --git a/src/gallium/drivers/r300/r300_video_context.c b/src/gallium/drivers/r300/r300_video_context.c
index 9fe6d6fcf25..1d5bfefb9dd 100644
--- a/src/gallium/drivers/r300/r300_video_context.c
+++ b/src/gallium/drivers/r300/r300_video_context.c
@@ -19,310 +19,20 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
-/*
- * Authors:
- *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
- */
-
-#include <X11/Xlib.h>
-#include <X11/Xutil.h>
-#include <pipe/p_defines.h>
-#include <pipe/p_context.h>
-#include <pipe/p_screen.h>
-#include <pipe/p_inlines.h>
-#include <util/u_memory.h>
-#include <X11/Xlib.h>
-
-#include <fcntl.h>
-
-#include "radeon_buffer.h"
-#include "radeon_r300.h"
-#include "r300_screen.h"
-#include "r300_texture.h"
-#include "p_video_context.h"
-#include "radeon_vl.h"
-#include "softpipe/sp_winsys.h"
-#include "softpipe/sp_texture.h"
-
+#include <vl/vl_context.h>
+#include <util/u_video.h>
 #include "r300_video_context.h"
-#include <softpipe/sp_video_context.h>
-
-static void r300_mpeg12_destroy(struct pipe_video_context *vpipe)
-{
-    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
-
-    assert(vpipe);
-
-    ctx->pipe->bind_vs_state(ctx->pipe, NULL);
-    ctx->pipe->bind_fs_state(ctx->pipe, NULL);
-
-    ctx->pipe->delete_blend_state(ctx->pipe, ctx->blend);
-    ctx->pipe->delete_rasterizer_state(ctx->pipe, ctx->rast);
-    ctx->pipe->delete_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
-
-    pipe_video_surface_reference(&ctx->decode_target, NULL);
-    vl_compositor_cleanup(&ctx->compositor);
-    vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-    ctx->pipe->destroy(ctx->pipe);
-
-    FREE(ctx);
-}
-
-static void
-r300_mpeg12_decode_macroblocks(struct pipe_video_context *vpipe,
-                               struct pipe_video_surface *past,
-                               struct pipe_video_surface *future,
-                               unsigned num_macroblocks,
-                               struct pipe_macroblock *macroblocks,
-                               struct pipe_fence_handle **fence)
-{
-    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
-    struct pipe_mpeg12_macroblock *mpeg12_macroblocks =
-                         (struct pipe_mpeg12_macroblock*)macroblocks;
-
-    assert(vpipe);
-    assert(num_macroblocks);
-    assert(macroblocks);
-    assert(macroblocks->codec == PIPE_VIDEO_CODEC_MPEG12);
-    assert(ctx->decode_target);
-
-    vl_mpeg12_mc_renderer_render_macroblocks(
-                            &ctx->mc_renderer,
-                            r300_video_surface(ctx->decode_target)->tex,
-                            past ? r300_video_surface(past)->tex : NULL,
-                            future ? r300_video_surface(future)->tex : NULL,
-                            num_macroblocks, mpeg12_macroblocks, fence);
-}
-
-static void r300_mpeg12_clear_surface(struct pipe_video_context *vpipe,
-                                      unsigned x, unsigned y,
-                                      unsigned width, unsigned height,
-                                      unsigned value,
-                                      struct pipe_surface *surface)
-{
-    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
-
-    assert(vpipe);
-    assert(surface);
-
-    if (ctx->pipe->surface_fill)
-        ctx->pipe->surface_fill(ctx->pipe, surface, x, y, width, height, value);
-    else
-        util_surface_fill(ctx->pipe, surface, x, y, width, height, value);
-}
-
-static void
-r300_mpeg12_render_picture(struct pipe_video_context     *vpipe,
-                           struct pipe_video_surface     *src_surface,
-                           enum pipe_mpeg12_picture_type picture_type,
-                           struct pipe_video_rect        *src_area,
-                           struct pipe_surface           *dst_surface,
-                           struct pipe_video_rect        *dst_area,
-                           struct pipe_fence_handle      **fence)
-{
-    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
-
-    assert(vpipe);
-    assert(src_surface);
-    assert(src_area);
-    assert(dst_surface);
-    assert(dst_area);
-
-    vl_compositor_render(&ctx->compositor,
-                         r300_video_surface(src_surface)->tex,
-                         picture_type, src_area, dst_surface->texture,
-                         dst_area, fence);
-}
-
-static void r300_mpeg12_set_decode_target(struct pipe_video_context *vpipe,
-                                          struct pipe_video_surface *dt)
-{
-    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
-
-    assert(vpipe);
-    assert(dt);
-
-    pipe_video_surface_reference(&ctx->decode_target, dt);
-}
-
-static void r300_mpeg12_set_csc_matrix(struct pipe_video_context *vpipe,
-                                       const float *mat)
-{
-    struct radeon_mpeg12_context *ctx = (struct radeon_mpeg12_context*)vpipe;
-
-    assert(vpipe);
-
-    vl_compositor_set_csc_matrix(&ctx->compositor, mat);
-}
-
-static bool r300_mpeg12_init_pipe_state(struct radeon_mpeg12_context *ctx)
-{
-    struct pipe_rasterizer_state rast;
-    struct pipe_blend_state blend;
-    struct pipe_depth_stencil_alpha_state dsa;
-    unsigned i;
-
-    assert(ctx);
-
-    rast.flatshade = 1;
-    rast.flatshade_first = 0;
-    rast.light_twoside = 0;
-    rast.front_winding = PIPE_WINDING_CCW;
-    rast.cull_mode = PIPE_WINDING_CW;
-    rast.fill_cw = PIPE_POLYGON_MODE_FILL;
-    rast.fill_ccw = PIPE_POLYGON_MODE_FILL;
-    rast.offset_cw = 0;
-    rast.offset_ccw = 0;
-    rast.scissor = 0;
-    rast.poly_smooth = 0;
-    rast.poly_stipple_enable = 0;
-    rast.point_sprite = 0;
-    rast.point_size_per_vertex = 0;
-    rast.multisample = 0;
-    rast.line_smooth = 0;
-    rast.line_stipple_enable = 0;
-    rast.line_stipple_factor = 0;
-    rast.line_stipple_pattern = 0;
-    rast.line_last_pixel = 0;
-    rast.bypass_vs_clip_and_viewport = 0;
-    rast.line_width = 1;
-    rast.point_smooth = 0;
-    rast.point_size = 1;
-    rast.offset_units = 1;
-    rast.offset_scale = 1;
-    /*rast.sprite_coord_mode[i] = ;*/
-    ctx->rast = ctx->pipe->create_rasterizer_state(ctx->pipe, &rast);
-    ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rast);
-
-    blend.blend_enable = 0;
-    blend.rgb_func = PIPE_BLEND_ADD;
-    blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE;
-    blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE;
-    blend.alpha_func = PIPE_BLEND_ADD;
-    blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE;
-    blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE;
-    blend.logicop_enable = 0;
-    blend.logicop_func = PIPE_LOGICOP_CLEAR;
-    /* Needed to allow color writes to FB, even if blending disabled */
-    blend.colormask = PIPE_MASK_RGBA;
-    blend.dither = 0;
-    ctx->blend = ctx->pipe->create_blend_state(ctx->pipe, &blend);
-    ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend);
-
-    dsa.depth.enabled = 0;
-    dsa.depth.writemask = 0;
-    dsa.depth.func = PIPE_FUNC_ALWAYS;
-    for (i = 0; i < 2; ++i)
-    {
-        dsa.stencil[i].enabled = 0;
-        dsa.stencil[i].func = PIPE_FUNC_ALWAYS;
-        dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP;
-        dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP;
-        dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP;
-        dsa.stencil[i].ref_value = 0;
-        dsa.stencil[i].valuemask = 0;
-        dsa.stencil[i].writemask = 0;
-    }
-    dsa.alpha.enabled = 0;
-    dsa.alpha.func = PIPE_FUNC_ALWAYS;
-    dsa.alpha.ref_value = 0;
-    ctx->dsa = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &dsa);
-    ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->dsa);
-
-    return true;
-}
-
-static struct pipe_video_context *
-r300_mpeg12_context_create(struct pipe_screen *screen,
-                           enum pipe_video_profile profile,
-                           enum pipe_video_chroma_format chroma_format,
-                           unsigned int width,
-                           unsigned int height)
-{
-    struct radeon_mpeg12_context *ctx;
-    ctx = CALLOC_STRUCT(radeon_mpeg12_context);
-    if (!ctx)
-        return NULL;
-
-    ctx->base.profile       = profile;
-    ctx->base.chroma_format = chroma_format;
-    ctx->base.width         = width;
-    ctx->base.height        = height;
-    ctx->base.screen        = screen;
-
-    ctx->base.destroy               = radeon_mpeg12_destroy;
-    ctx->base.decode_macroblocks    = radeon_mpeg12_decode_macroblocks;
-    ctx->base.clear_surface         = radeon_mpeg12_clear_surface;
-    ctx->base.render_picture        = radeon_mpeg12_render_picture;
-    ctx->base.set_decode_target     = radeon_mpeg12_set_decode_target;
-    ctx->base.set_csc_matrix        = radeon_mpeg12_set_csc_matrix;
-
-    ctx->pipe = r300_create_context(screen,(struct r300_winsys*)screen->winsys);
-    if (!ctx->pipe)
-    {
-        FREE(ctx);
-        return NULL;
-    }
-
-    if (!vl_mpeg12_mc_renderer_init(&ctx->mc_renderer, ctx->pipe,
-                                   width, height, chroma_format,
-                                   VL_MPEG12_MC_RENDERER_BUFFER_PICTURE,
-                                   true))
-    {
-        ctx->pipe->destroy(ctx->pipe);
-        FREE(ctx);
-        return NULL;
-    }
-
-    if (!vl_compositor_init(&ctx->compositor, ctx->pipe))
-    {
-        vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-        ctx->pipe->destroy(ctx->pipe);
-        FREE(ctx);
-        return NULL;
-    }
-
-    if (!radeon_mpeg12_init_pipe_state(ctx))
-    {
-        vl_compositor_cleanup(&ctx->compositor);
-        vl_mpeg12_mc_renderer_cleanup(&ctx->mc_renderer);
-        ctx->pipe->destroy(ctx->pipe);
-        FREE(ctx);
-        return NULL;
-    }
-
-    return &ctx->base;
-}
 
 struct pipe_video_context *
-r300_video_create(struct pipe_context *pipe, enum pipe_video_profile profile,
-                  enum pipe_video_chroma_format chroma_format,
-                  unsigned width, unsigned height,
-                  unsigned pvctx_id)
+r300_video_create(struct pipe_screen *screen, void *priv)
 {
-    struct pipe_video_context *vpipe;
-    struct radeon_vl_context *rvl_ctx;
-
-    assert(p_screen);
-    assert(width && height);
-
-    /* create radeon pipe_context */
-    switch(u_reduce_video_profile(profile))
-    {
-        case PIPE_VIDEO_CODEC_MPEG12:
-            vpipe = radeon_mpeg12_context_create(p_screen, profile, chr_f,
-                                                 width, height);
-            break;
-        default:
-            return NULL;
-    }
+   struct pipe_context *pipe;
 
-    /* create radeon_vl_context */
-    rvl_ctx = calloc(1, sizeof(struct radeon_vl_context));
-    rvl_ctx->display = display;
-    rvl_ctx->screen = screen;
+   assert(screen);
 
-    vpipe->priv = rvl_ctx;
+   pipe = screen->context_create(screen, priv);
+   if (!pipe)
+      return NULL;
 
-    return vpipe;
+   return vl_create_context(pipe, false);
 }
diff --git a/src/gallium/drivers/r300/r300_video_context.h b/src/gallium/drivers/r300/r300_video_context.h
index a8210ba7b71..aaae14cec47 100644
--- a/src/gallium/drivers/r300/r300_video_context.h
+++ b/src/gallium/drivers/r300/r300_video_context.h
@@ -19,22 +19,12 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
-/*
- * Authors:
- *   CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
- */
-
 #ifndef __R300_VIDEO_CONTEXT_H__
 #define __R300_VIDEO_CONTEXT_H__
 
 #include <pipe/p_video_context.h>
 
-struct pipe_context;
-
-struct pipe_video_context*
-r300_video_create(struct pipe_context *pipe, enum pipe_video_profile profile,
-                  enum pipe_video_chroma_format chroma_format,
-                  unsigned width, unsigned height,
-                  unsigned pvctx_id);
+struct pipe_video_context *
+r300_video_create(struct pipe_screen *screen, void *priv);
 
 #endif
diff --git a/src/gallium/targets/va-r300/Makefile b/src/gallium/targets/va-r300/Makefile
new file mode 100644
index 00000000000..55c950450b9
--- /dev/null
+++ b/src/gallium/targets/va-r300/Makefile
@@ -0,0 +1,26 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBBASENAME = r300_drv_video
+
+DRIVER_DEFINES = -DGALLIUM_SOFTPIPE
+DRIVER_INCLUDES =
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/drivers/r300/libr300.a \
+	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
+        $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
+	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+        $(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/auxiliary/libgallium.a
+
+C_SOURCES = \
+	target.c \
+	$(COMMON_GALLIUM_SOURCES) \
+	$(DRIVER_SOURCES)
+
+DRIVER_LIBS = $(shell pkg-config libdrm_radeon --libs) -lXfixes
+
+include ../Makefile.va
+
+symlinks:
diff --git a/src/gallium/targets/va-r300/target.c b/src/gallium/targets/va-r300/target.c
new file mode 100644
index 00000000000..9f673bf17e6
--- /dev/null
+++ b/src/gallium/targets/va-r300/target.c
@@ -0,0 +1,24 @@
+#include "state_tracker/drm_driver.h"
+#include "target-helpers/inline_debug_helper.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r300/r300_public.h"
+
+static struct pipe_screen *create_screen(int fd)
+{
+   struct radeon_winsys *radeon;
+   struct pipe_screen *screen;
+
+   radeon = radeon_drm_winsys_create(fd);
+   if (!radeon)
+      return NULL;
+
+   screen = r300_screen_create(radeon);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+DRM_DRIVER_DESCRIPTOR("r300", "radeon", create_screen)
diff --git a/src/gallium/targets/vdpau-r300/Makefile b/src/gallium/targets/vdpau-r300/Makefile
new file mode 100644
index 00000000000..4fc1291a9f2
--- /dev/null
+++ b/src/gallium/targets/vdpau-r300/Makefile
@@ -0,0 +1,27 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBBASENAME = vdpau_r300
+
+
+PIPE_DRIVERS = \
+        $(TOP)/src/gallium/drivers/r300/libr300.a \
+	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
+        $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
+        $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+        $(TOP)/src/gallium/drivers/rbug/librbug.a \
+        $(TOP)/src/gallium/drivers/trace/libtrace.a \
+        $(TOP)/src/gallium/drivers/galahad/libgalahad.a \
+	$(TOP)/src/gallium/auxiliary/libgallium.a \
+	$(TOP)/src/mesa/libmesagallium.a
+
+C_SOURCES = \
+	target.c \
+	$(COMMON_GALLIUM_SOURCES) \
+	$(DRIVER_SOURCES)
+
+DRIVER_LIBS = $(shell pkg-config libdrm_radeon --libs) -lXfixes $(LLVM_LIBS)
+
+include ../Makefile.vdpau
+
+symlinks:
diff --git a/src/gallium/targets/vdpau-r300/target.c b/src/gallium/targets/vdpau-r300/target.c
new file mode 100644
index 00000000000..9f673bf17e6
--- /dev/null
+++ b/src/gallium/targets/vdpau-r300/target.c
@@ -0,0 +1,24 @@
+#include "state_tracker/drm_driver.h"
+#include "target-helpers/inline_debug_helper.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r300/r300_public.h"
+
+static struct pipe_screen *create_screen(int fd)
+{
+   struct radeon_winsys *radeon;
+   struct pipe_screen *screen;
+
+   radeon = radeon_drm_winsys_create(fd);
+   if (!radeon)
+      return NULL;
+
+   screen = r300_screen_create(radeon);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+DRM_DRIVER_DESCRIPTOR("r300", "radeon", create_screen)
diff --git a/src/gallium/targets/xvmc-r300/Makefile b/src/gallium/targets/xvmc-r300/Makefile
new file mode 100644
index 00000000000..4998f4dcab4
--- /dev/null
+++ b/src/gallium/targets/xvmc-r300/Makefile
@@ -0,0 +1,22 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBBASENAME = XvMCr300
+
+PIPE_DRIVERS = \
+        $(TOP)/src/gallium/drivers/r300/libr300.a \
+	$(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
+        $(TOP)/src/gallium/winsys/radeon/drm/libradeonwinsys.a \
+        $(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/auxiliary/libgallium.a
+
+C_SOURCES = \
+	target.c \
+	$(COMMON_GALLIUM_SOURCES) \
+	$(DRIVER_SOURCES)
+
+DRIVER_LIBS = $(shell pkg-config libdrm_radeon --libs) -lXfixes
+
+include ../Makefile.xvmc
+
+symlinks:
diff --git a/src/gallium/targets/xvmc-r300/target.c b/src/gallium/targets/xvmc-r300/target.c
new file mode 100644
index 00000000000..9f673bf17e6
--- /dev/null
+++ b/src/gallium/targets/xvmc-r300/target.c
@@ -0,0 +1,24 @@
+#include "state_tracker/drm_driver.h"
+#include "target-helpers/inline_debug_helper.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r300/r300_public.h"
+
+static struct pipe_screen *create_screen(int fd)
+{
+   struct radeon_winsys *radeon;
+   struct pipe_screen *screen;
+
+   radeon = radeon_drm_winsys_create(fd);
+   if (!radeon)
+      return NULL;
+
+   screen = r300_screen_create(radeon);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+DRM_DRIVER_DESCRIPTOR("r300", "radeon", create_screen)
-- 
cgit v1.2.3


From b4837a53d365bd1bd5f50b3fb40910eeaac869a5 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 22 May 2011 23:15:09 +0200
Subject: [g3dvl] fix resource type for idct

Only the intermediate resource are 3D textures
---
 src/gallium/auxiliary/vl/vl_idct.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 11f935afd73..602258ece86 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -86,11 +86,9 @@ calc_addr(struct ureg_program *shader, struct ureg_dst addr[2],
     */
    ureg_MOV(shader, ureg_writemask(addr[0], wm_start), ureg_scalar(start, sw_start));
    ureg_MOV(shader, ureg_writemask(addr[0], wm_tc), ureg_scalar(tc, sw_tc));
-   ureg_MOV(shader, ureg_writemask(addr[0], TGSI_WRITEMASK_Z), tc);
 
    ureg_ADD(shader, ureg_writemask(addr[1], wm_start), ureg_scalar(start, sw_start), ureg_imm1f(shader, 1.0f / size));
    ureg_MOV(shader, ureg_writemask(addr[1], wm_tc), ureg_scalar(tc, sw_tc));
-   ureg_MOV(shader, ureg_writemask(addr[1], TGSI_WRITEMASK_Z), tc);
 }
 
 static void
@@ -113,10 +111,11 @@ increment_addr(struct ureg_program *shader, struct ureg_dst daddr[2],
 }
 
 static void
-fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2], struct ureg_src sampler)
+fetch_four(struct ureg_program *shader, struct ureg_dst m[2], struct ureg_src addr[2],
+           struct ureg_src sampler, bool resource3d)
 {
-   ureg_TEX(shader, m[0], TGSI_TEXTURE_3D, addr[0], sampler);
-   ureg_TEX(shader, m[1], TGSI_TEXTURE_3D, addr[1], sampler);
+   ureg_TEX(shader, m[0], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[0], sampler);
+   ureg_TEX(shader, m[1], resource3d ? TGSI_TEXTURE_3D : TGSI_TEXTURE_2D, addr[1], sampler);
 }
 
 static void
@@ -188,9 +187,6 @@ create_stage1_vert_shader(struct vl_idct *idct)
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(t_tex));
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
 
-   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_Z),
-      ureg_scalar(vrect, TGSI_SWIZZLE_X),
-      ureg_imm1f(shader, BLOCK_WIDTH / idct->nr_of_render_targets));
    ureg_MUL(shader, ureg_writemask(t_start, TGSI_WRITEMASK_XY), vpos, scale);
 
    calc_addr(shader, o_l_addr, ureg_src(t_tex), ureg_src(t_start), false, false, idct->buffer_width / 4);
@@ -243,14 +239,14 @@ create_stage1_frag_shader(struct vl_idct *idct)
 
    for (i = 0; i < 4; ++i) {
       struct ureg_src s_addr[2] = { ureg_src(l[i][0]), ureg_src(l[i][1]) };
-      fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1));
+      fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1), false);
    }
 
    for (i = 0; i < idct->nr_of_render_targets; ++i) {
       increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, BLOCK_HEIGHT);
 
       struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) };
-      fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0));
+      fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0), false);
 
       for (j = 0; j < 4; ++j) {
          matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
@@ -302,6 +298,9 @@ vl_idct_stage2_vert_shader(struct vl_idct *idct, struct ureg_program *shader,
 
    calc_addr(shader, o_l_addr, vrect, ureg_imm1f(shader, 0.0f), false, false, BLOCK_WIDTH / 4);
    calc_addr(shader, o_r_addr, ureg_src(tex), ureg_src(t_start), true, false, idct->buffer_height / 4);
+
+   ureg_MOV(shader, ureg_writemask(o_r_addr[0], TGSI_WRITEMASK_Z), ureg_src(tex));
+   ureg_MOV(shader, ureg_writemask(o_r_addr[1], TGSI_WRITEMASK_Z), ureg_src(tex));
 }
 
 void
@@ -325,8 +324,8 @@ vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
    r[0] = ureg_DECL_temporary(shader);
    r[1] = ureg_DECL_temporary(shader);
 
-   fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0));
-   fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1));
+   fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0), false);
+   fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1), true);
 
    matrix_mul(shader, fragment, l, r);
 
-- 
cgit v1.2.3


From ae56a1dd67040dc5d53f4a1622f775462f0fec05 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 26 May 2011 01:50:44 +0200
Subject: [g3dvl] implement mismatch control inside idct shaders

---
 src/gallium/auxiliary/vl/vl_idct.c             | 210 ++++++++++++++++++++++---
 src/gallium/auxiliary/vl/vl_idct.h             |  11 +-
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c |  15 --
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   |   8 +-
 4 files changed, 198 insertions(+), 46 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 602258ece86..45180499e2e 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -138,6 +138,121 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2
    ureg_release_temporary(shader, tmp);
 }
 
+static void *
+create_mismatch_vert_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+   struct ureg_src vrect, vpos;
+   struct ureg_src scale;
+   struct ureg_dst t_tex;
+   struct ureg_dst o_vpos, o_addr[2];
+
+   shader = ureg_create(TGSI_PROCESSOR_VERTEX);
+   if (!shader)
+      return NULL;
+
+   vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
+   vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
+
+   t_tex = ureg_DECL_temporary(shader);
+
+   o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
+
+   o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0);
+   o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1);
+
+   /*
+    * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height)
+    *
+    * t_vpos = vpos + 7 / BLOCK_WIDTH
+    * o_vpos.xy = t_vpos * scale
+    *
+    * o_addr = calc_addr(...)
+    *
+    */
+
+   scale = ureg_imm2f(shader,
+      (float)BLOCK_WIDTH / idct->buffer_width,
+      (float)BLOCK_HEIGHT / idct->buffer_height);
+
+   ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale);
+   ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
+
+   ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale);
+   calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4);
+
+   ureg_release_temporary(shader, t_tex);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
+static void *
+create_mismatch_frag_shader(struct vl_idct *idct)
+{
+   struct ureg_program *shader;
+
+   struct ureg_src addr[2];
+
+   struct ureg_dst m[8][2];
+   struct ureg_dst fragment;
+
+   unsigned i;
+
+   shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+   if (!shader)
+      return NULL;
+
+   addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR);
+   addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR);
+
+   fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   for (i = 0; i < 8; ++i) {
+      m[i][0] = ureg_DECL_temporary(shader);
+      m[i][1] = ureg_DECL_temporary(shader);
+   }
+
+   for (i = 0; i < 8; ++i) {
+      increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height);
+   }
+
+   for (i = 0; i < 8; ++i) {
+      struct ureg_src s_addr[2] = { ureg_src(m[i][0]), ureg_src(m[i][1]) };
+      fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false);
+   }
+
+   for (i = 1; i < 8; ++i) {
+      ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0]));
+      ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1]));
+   }
+
+   ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1]));
+   ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14));
+
+   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14));
+   ureg_FRC(shader, m[0][0], ureg_src(m[0][0]));
+   ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0])));
+
+   ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])),
+            ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15)));
+   ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]),
+            ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X));
+
+   ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1]));
+   ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1]));
+
+   for (i = 0; i < 8; ++i) {
+      ureg_release_temporary(shader, m[i][0]);
+      ureg_release_temporary(shader, m[i][1]);
+   }
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, idct->pipe);
+}
+
 static void *
 create_stage1_vert_shader(struct vl_idct *idct)
 {
@@ -239,14 +354,14 @@ create_stage1_frag_shader(struct vl_idct *idct)
 
    for (i = 0; i < 4; ++i) {
       struct ureg_src s_addr[2] = { ureg_src(l[i][0]), ureg_src(l[i][1]) };
-      fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1), false);
+      fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false);
    }
 
    for (i = 0; i < idct->nr_of_render_targets; ++i) {
       increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, BLOCK_HEIGHT);
 
       struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) };
-      fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0), false);
+      fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false);
 
       for (j = 0; j < 4; ++j) {
          matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r);
@@ -324,8 +439,8 @@ vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
    r[0] = ureg_DECL_temporary(shader);
    r[1] = ureg_DECL_temporary(shader);
 
-   fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0), false);
-   fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1), true);
+   fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false);
+   fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true);
 
    matrix_mul(shader, fragment, l, r);
 
@@ -338,6 +453,14 @@ vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader,
 static bool
 init_shaders(struct vl_idct *idct)
 {
+   idct->vs_mismatch = create_mismatch_vert_shader(idct);
+   if (!idct->vs_mismatch)
+      goto error_vs_mismatch;
+
+   idct->fs_mismatch = create_mismatch_frag_shader(idct);
+   if (!idct->fs_mismatch)
+      goto error_fs_mismatch;
+
    idct->vs = create_stage1_vert_shader(idct);
    if (!idct->vs)
       goto error_vs;
@@ -352,12 +475,20 @@ error_fs:
    idct->pipe->delete_vs_state(idct->pipe, idct->vs);
 
 error_vs:
+   idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
+
+error_fs_mismatch:
+   idct->pipe->delete_vs_state(idct->pipe, idct->fs);
+
+error_vs_mismatch:
    return false;
 }
 
 static void
 cleanup_shaders(struct vl_idct *idct)
 {
+   idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch);
+   idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch);
    idct->pipe->delete_vs_state(idct->pipe, idct->vs);
    idct->pipe->delete_fs_state(idct->pipe, idct->fs);
 }
@@ -373,6 +504,7 @@ init_state(struct vl_idct *idct)
    assert(idct);
 
    memset(&rs_state, 0, sizeof(rs_state));
+   rs_state.point_size = 1;
    rs_state.gl_rasterization_rules = true;
    idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state);
    if (!idct->rs_state)
@@ -441,6 +573,45 @@ cleanup_state(struct vl_idct *idct)
    idct->pipe->delete_blend_state(idct->pipe, idct->blend);
 }
 
+static bool
+init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+{
+   struct pipe_resource *tex;
+   struct pipe_surface surf_templ;
+
+   assert(idct && buffer);
+
+   tex = buffer->sampler_views.individual.source->texture;
+
+   buffer->fb_state_mismatch.width = tex->width0;
+   buffer->fb_state_mismatch.height = tex->height0;
+   buffer->fb_state_mismatch.nr_cbufs = 1;
+
+   memset(&surf_templ, 0, sizeof(surf_templ));
+   surf_templ.format = tex->format;
+   surf_templ.u.tex.first_layer = 0;
+   surf_templ.u.tex.last_layer = 0;
+   surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+   buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ);
+
+   buffer->viewport_mismatch.scale[0] = tex->width0;
+   buffer->viewport_mismatch.scale[1] = tex->height0;
+   buffer->viewport_mismatch.scale[2] = 1;
+   buffer->viewport_mismatch.scale[3] = 1;
+
+   return true;
+}
+
+static void
+cleanup_source(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+{
+   assert(idct && buffer);
+
+   pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL);
+
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL);
+}
+
 static bool
 init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
@@ -470,6 +641,8 @@ init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 
    buffer->viewport.scale[0] = tex->width0;
    buffer->viewport.scale[1] = tex->height0;
+   buffer->viewport.scale[2] = 1;
+   buffer->viewport.scale[3] = 1;
 
    return true;
 
@@ -609,13 +782,11 @@ vl_idct_cleanup(struct vl_idct *idct)
 bool
 vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
                     struct pipe_sampler_view *source,
-                    struct pipe_sampler_view *intermediate,
-                    struct pipe_surface *destination)
+                    struct pipe_sampler_view *intermediate)
 {
    assert(buffer);
    assert(idct);
    assert(source);
-   assert(destination);
 
    memset(buffer, 0, sizeof(struct vl_idct_buffer));
 
@@ -624,15 +795,11 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
    pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
    pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate);
 
-   if (!init_intermediate(idct, buffer))
+   if (!init_source(idct, buffer))
       return false;
 
-   buffer->viewport.scale[2] = 1;
-   buffer->viewport.scale[3] = 1;
-   buffer->viewport.translate[0] = 0;
-   buffer->viewport.translate[1] = 0;
-   buffer->viewport.translate[2] = 0;
-   buffer->viewport.translate[3] = 0;
+   if (!init_intermediate(idct, buffer))
+      return false;
 
    return true;
 }
@@ -640,13 +807,9 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
 void
 vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 {
-   unsigned i;
-
    assert(idct && buffer);
 
-   for(i = 0; i < idct->nr_of_render_targets; ++i)
-      pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL);
-
+   cleanup_source(idct, buffer);
    cleanup_intermediate(idct, buffer);
 }
 
@@ -659,11 +822,18 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_
    idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
    idct->pipe->bind_blend_state(idct->pipe, idct->blend);
    idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
+   idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
+
+   /* mismatch control */
+   idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch);
+   idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport_mismatch);
+   idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch);
+   idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch);
+   util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances);
 
    /* first stage */
    idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state);
    idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport);
-   idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]);
    idct->pipe->bind_vs_state(idct->pipe, idct->vs);
    idct->pipe->bind_fs_state(idct->pipe, idct->fs);
    util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index f5a1e5d9b73..119a53dbf27 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -48,6 +48,7 @@ struct vl_idct
 
    void *samplers[2];
 
+   void *vs_mismatch, *fs_mismatch;
    void *vs, *fs;
 
    struct pipe_sampler_view *matrix;
@@ -57,7 +58,10 @@ struct vl_idct
 /* a set of buffers to work with */
 struct vl_idct_buffer
 {
+   struct pipe_viewport_state viewport_mismatch;
    struct pipe_viewport_state viewport;
+
+   struct pipe_framebuffer_state fb_state_mismatch;
    struct pipe_framebuffer_state fb_state;
 
    union
@@ -65,8 +69,8 @@ struct vl_idct_buffer
       struct pipe_sampler_view *all[4];
       struct pipe_sampler_view *stage[2][2];
       struct {
-         struct pipe_sampler_view *matrix, *source;
-         struct pipe_sampler_view *transpose, *intermediate;
+         struct pipe_sampler_view *source, *matrix;
+         struct pipe_sampler_view *intermediate, *transpose;
       } individual;
    } sampler_views;
 };
@@ -99,8 +103,7 @@ vl_idct_cleanup(struct vl_idct *idct);
 bool
 vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
                     struct pipe_sampler_view *source,
-                    struct pipe_sampler_view *intermediate,
-                    struct pipe_surface *destination);
+                    struct pipe_sampler_view *intermediate);
 
 /* cleanup a buffer of an idct instance */
 void
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 508bb9fab19..bf9b6cd6b11 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -724,11 +724,9 @@ static inline void
 get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
 {
    int i, val;
-   int mismatch;
    const DCTtab *tab;
 
    i = 0;
-   mismatch = ~dest[0];
 
    vl_vlc_needbits(&bs->vlc);
 
@@ -751,7 +749,6 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
          SATURATE (val);
          dest[i] = val;
-         mismatch ^= val;
 
          bs->vlc.buf <<= 1;
          vl_vlc_needbits(&bs->vlc);
@@ -778,7 +775,6 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
          SATURATE (val);
          dest[i] = val;
-         mismatch ^= val;
 
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
@@ -811,7 +807,6 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
       break;	/* illegal, check needed to avoid buffer overflow */
    }
 
-   dest[63] ^= mismatch & 1;
    vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
 }
 
@@ -819,11 +814,9 @@ static inline void
 get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
 {
    int i, val;
-   int mismatch;
    const DCTtab * tab;
 
    i = 0;
-   mismatch = ~dest[0];
 
    vl_vlc_needbits(&bs->vlc);
 
@@ -845,7 +838,6 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
             SATURATE (val);
             dest[i] = val;
-            mismatch ^= val;
 
             bs->vlc.buf <<= 1;
             vl_vlc_needbits(&bs->vlc);
@@ -871,7 +863,6 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
             SATURATE (val);
             dest[i] = val;
-            mismatch ^= val;
 
             vl_vlc_dumpbits(&bs->vlc, 12);
             vl_vlc_needbits(&bs->vlc);
@@ -905,7 +896,6 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
       break;	/* illegal, check needed to avoid buffer overflow */
    }
 
-   dest[63] ^= mismatch & 1;
    vl_vlc_dumpbits(&bs->vlc, 4);	/* dump end of block code */
 }
 
@@ -913,11 +903,9 @@ static inline void
 get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
 {
    int i, val;
-   int mismatch;
    const DCTtab *tab;
 
    i = -1;
-   mismatch = 1;
 
    vl_vlc_needbits(&bs->vlc);
    if (bs->vlc.buf >= 0x28000000) {
@@ -946,7 +934,6 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
          SATURATE (val);
          dest[i] = val;
-         mismatch ^= val;
 
          bs->vlc.buf <<= 1;
          vl_vlc_needbits(&bs->vlc);
@@ -977,7 +964,6 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
          SATURATE (val);
          dest[i] = val;
-         mismatch ^= val;
 
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
@@ -1009,7 +995,6 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
       }
       break;	/* illegal, check needed to avoid buffer overflow */
    }
-   dest[63] ^= mismatch & 1;
    vl_vlc_dumpbits(&bs->vlc, 2);	/* dump end of block code */
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 4337e083383..f96d7f0e2b3 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -147,7 +147,6 @@ static bool
 init_idct_buffer(struct vl_mpeg12_buffer *buffer)
 {
    struct pipe_sampler_view **idct_source_sv, **mc_source_sv;
-   struct pipe_surface **idct_surfaces;
 
    struct vl_mpeg12_decoder *dec;
 
@@ -165,14 +164,10 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer)
    if (!mc_source_sv)
       goto error_mc_source_sv;
 
-   idct_surfaces = dec->mc_source->get_surfaces(dec->mc_source);
-   if (!idct_surfaces)
-      goto error_surfaces;
-
    for (i = 0; i < 3; ++i)
       if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c,
                                &buffer->idct[i], idct_source_sv[i],
-                               mc_source_sv[i], idct_surfaces[i]))
+                               mc_source_sv[i]))
          goto error_plane;
 
    return true;
@@ -181,7 +176,6 @@ error_plane:
    for (; i > 0; --i)
       vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
 
-error_surfaces:
 error_mc_source_sv:
 error_source_sv:
    return false;
-- 
cgit v1.2.3


From 912dc8ff09cd7c28926762c2e562de5a99d3e27a Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 29 May 2011 19:53:45 +0200
Subject: [g3dvl] move quantification into shaders

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 107 +++++++------------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   |  23 ++++-
 src/gallium/auxiliary/vl/vl_zscan.c            | 138 +++++++++++++++++++------
 src/gallium/auxiliary/vl/vl_zscan.h            |  12 +--
 4 files changed, 168 insertions(+), 112 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index bf9b6cd6b11..e7fbc31ce24 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -55,7 +55,6 @@
 #include <pipe/p_video_state.h>
 
 #include "vl_vlc.h"
-#include "vl_zscan.h"
 #include "vl_mpeg12_bitstream.h"
 
 /* take num bits from the high part of bit_buf and zero extend them */
@@ -64,12 +63,6 @@
 /* take num bits from the high part of bit_buf and sign extend them */
 #define SBITS(buf,num) (((int32_t)(buf)) >> (32 - (num)))
 
-#define SATURATE(val)			\
-do {					\
-   if ((uint32_t)(val + 2048) > 4095)	\
-      val = (val > 0) ? 2047 : -2048;	\
-} while (0)
-
 /* macroblock modes */
 #define MACROBLOCK_INTRA 1
 #define MACROBLOCK_PATTERN 2
@@ -721,7 +714,7 @@ get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs)
 }
 
 static inline void
-get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
+get_intra_block_B14(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
 {
    int i, val;
    const DCTtab *tab;
@@ -742,12 +735,10 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
       normal_code:
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
+         val = tab->level * quantizer_scale;
 
-         /* if (bitstream_get (1)) val = -val; */
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
-         SATURATE (val);
          dest[i] = val;
 
          bs->vlc.buf <<= 1;
@@ -771,9 +762,8 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
-         val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[i]) / 16;
+         val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;
 
-         SATURATE (val);
          dest[i] = val;
 
          vl_vlc_dumpbits(&bs->vlc, 12);
@@ -811,7 +801,7 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 }
 
 static inline void
-get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
+get_intra_block_B15(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
 {
    int i, val;
    const DCTtab * tab;
@@ -831,12 +821,10 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
          normal_code:
             bs->vlc.buf <<= tab->len;
             bs->vlc.bits += tab->len + 1;
-            val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
+            val = tab->level * quantizer_scale;
 
-            /* if (bitstream_get (1)) val = -val; */
             val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
-            SATURATE (val);
             dest[i] = val;
 
             bs->vlc.buf <<= 1;
@@ -859,9 +847,8 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 
             vl_vlc_dumpbits(&bs->vlc, 12);
             vl_vlc_needbits(&bs->vlc);
-            val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[i]) / 16;
+            val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale;
 
-            SATURATE (val);
             dest[i] = val;
 
             vl_vlc_dumpbits(&bs->vlc, 12);
@@ -900,7 +887,7 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 }
 
 static inline void
-get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
+get_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
 {
    int i, val;
    const DCTtab *tab;
@@ -927,12 +914,10 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
       normal_code:
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[i]) >> 5;
+         val = ((2*tab->level+1) * quantizer_scale) >> 1;
 
-         /* if (bitstream_get (1)) val = -val; */
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
-         SATURATE (val);
          dest[i] = val;
 
          bs->vlc.buf <<= 1;
@@ -960,9 +945,8 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
          vl_vlc_dumpbits(&bs->vlc, 12);
          vl_vlc_needbits(&bs->vlc);
          val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1;
-         val = (val * quantizer_scale * quant_matrix[i]) / 32;
+         val = (val * quantizer_scale) / 2;
 
-         SATURATE (val);
          dest[i] = val;
 
          vl_vlc_dumpbits(&bs->vlc, 12);
@@ -999,7 +983,7 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan
 }
 
 static inline void
-get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
+get_mpeg1_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
 {
    int i, val;
    const DCTtab * tab;
@@ -1020,7 +1004,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu
       normal_code:
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
+         val = tab->level * quantizer_scale;
 
          /* oddification */
          val = (val - 1) | 1;
@@ -1028,7 +1012,6 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu
          /* if (bitstream_get (1)) val = -val; */
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
-         SATURATE (val);
          dest[i] = val;
 
          bs->vlc.buf <<= 1;
@@ -1057,12 +1040,11 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu
             vl_vlc_dumpbits(&bs->vlc, 8);
             val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
          }
-         val = (val * quantizer_scale * quant_matrix[i]) / 16;
+         val = val * quantizer_scale;
 
          /* oddification */
          val = (val + ~SBITS (val, 1)) | 1;
 
-         SATURATE (val);
          dest[i] = val;
 
          vl_vlc_dumpbits(&bs->vlc, 8);
@@ -1099,7 +1081,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu
 }
 
 static inline void
-get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest)
+get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest)
 {
    int i, val;
    const DCTtab * tab;
@@ -1126,7 +1108,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in
       normal_code:
          bs->vlc.buf <<= tab->len;
          bs->vlc.bits += tab->len + 1;
-         val = ((2*tab->level+1) * quantizer_scale * quant_matrix[i]) >> 5;
+         val = ((2*tab->level+1) * quantizer_scale) >> 1;
 
          /* oddification */
          val = (val - 1) | 1;
@@ -1134,7 +1116,6 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in
          /* if (bitstream_get (1)) val = -val; */
          val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1);
 
-         SATURATE (val);
          dest[i] = val;
 
          bs->vlc.buf <<= 1;
@@ -1167,12 +1148,11 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in
             val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val;
          }
          val = 2 * (val + SBITS (val, 1)) + 1;
-         val = (val * quantizer_scale * quant_matrix[i]) / 32;
+         val = (val * quantizer_scale) / 2;
 
          /* oddification */
          val = (val + ~SBITS (val, 1)) | 1;
 
-         SATURATE (val);
          dest[i] = val;
 
          vl_vlc_dumpbits(&bs->vlc, 8);
@@ -1209,7 +1189,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in
 }
 
 static inline void
-slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int quant_matrix[64], int cc,
+slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
                  unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3])
 {
    short dest[64];
@@ -1228,14 +1208,14 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
       dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs);
 
    memset(dest, 0, sizeof(int16_t) * 64);
-   dest[0] = dc_dct_pred[cc] << (3 - picture->intra_dc_precision);
+   dest[0] = dc_dct_pred[cc];
    if (picture->mpeg1) {
       if (picture->picture_coding_type != D_TYPE)
-          get_mpeg1_intra_block(bs, quant_matrix, quantizer_scale, dest);
+          get_mpeg1_intra_block(bs, quantizer_scale, dest);
    } else if (picture->intra_vlc_format)
-      get_intra_block_B15(bs, quant_matrix, quantizer_scale, dest);
+      get_intra_block_B15(bs, quantizer_scale, dest);
    else
-      get_intra_block_B14(bs, quant_matrix, quantizer_scale, dest);
+      get_intra_block_B14(bs, quantizer_scale, dest);
 
    memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
 
@@ -1245,7 +1225,7 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
 }
 
 static inline void
-slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int quant_matrix[64], int cc,
+slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc,
                     unsigned x, unsigned y,  enum pipe_mpeg12_dct_type coding, int quantizer_scale)
 {
    short dest[64];
@@ -1257,9 +1237,9 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
 
    memset(dest, 0, sizeof(int16_t) * 64);
    if (picture->mpeg1)
-      get_mpeg1_non_intra_block(bs, quant_matrix, quantizer_scale, dest);
+      get_mpeg1_non_intra_block(bs, quantizer_scale, dest);
    else
-      get_non_intra_block(bs, quant_matrix, quantizer_scale, dest);
+      get_non_intra_block(bs, quantizer_scale, dest);
 
    memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64);
 
@@ -1571,8 +1551,7 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture,
 }
 
 static inline bool
-decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
-             const int intra_quantizer_matrix[64], const int non_intra_quantizer_matrix[64])
+decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
 {
    enum pipe_video_field_select default_field_select;
    struct pipe_motionvector mv_fwd, mv_bwd;
@@ -1659,12 +1638,12 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
          mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN;
 
          // unravaled loop of 6 block(i) calls in macroblock()
-         slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, intra_quantizer_matrix, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
-         slice_intra_DCT(bs, picture, intra_quantizer_matrix, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
+         slice_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred);
 
          if (picture->picture_coding_type == D_TYPE) {
             vl_vlc_needbits(&bs->vlc);
@@ -1722,17 +1701,17 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture,
 
             // TODO  optimize not fully used for idct accel only mc.
             if (coded_block_pattern & 0x20)
-               slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0  luma 0
+               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0  luma 0
             if (coded_block_pattern & 0x10)
-               slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1
+               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1
             if (coded_block_pattern & 0x08)
-               slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2
+               slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2
             if (coded_block_pattern & 0x04)
-               slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3
+               slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3
             if (coded_block_pattern & 0x2)
-               slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma
+               slice_non_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma
             if (coded_block_pattern & 0x1)
-               slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma
+               slice_non_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma
          }
 
          dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
@@ -1845,12 +1824,6 @@ void
 vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer,
                    struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3])
 {
-   int intra_quantizer_matrix[64];
-   int non_intra_quantizer_matrix[64];
-
-   const int *scan;
-   unsigned i;
-
    assert(bs);
    assert(num_ycbcr_blocks);
    assert(buffer && num_bytes);
@@ -1859,11 +1832,5 @@ vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffe
 
    vl_vlc_init(&bs->vlc, buffer, num_bytes);
 
-   scan = picture->alternate_scan ? vl_zscan_alternate : vl_zscan_normal;
-   for (i = 0; i < 64; ++i) {
-      intra_quantizer_matrix[i] = picture->intra_quantizer_matrix[scan[i]];
-      non_intra_quantizer_matrix[i] = picture->non_intra_quantizer_matrix[scan[i]];
-   }
-
-   while(decode_slice(bs, picture, intra_quantizer_matrix, non_intra_quantizer_matrix));
+   while(decode_slice(bs, picture));
 }
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index f96d7f0e2b3..ca790e7bc75 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -312,8 +312,21 @@ vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)
 
       vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream);
    } else {
-      for (i = 0; i < VL_MAX_PLANES; ++i)
+      static const uint8_t dummy_quant[64] = {
+         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
+      };
+
+      for (i = 0; i < VL_MAX_PLANES; ++i) {
          vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear);
+         vl_zscan_upload_quant(&buf->zscan[i], dummy_quant, dummy_quant);
+      }
    }
 }
 
@@ -365,6 +378,7 @@ vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
                                   unsigned num_ycbcr_blocks[3])
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   uint8_t intra_quantizer_matrix[64];
    struct vl_mpeg12_decoder *dec;
    unsigned i;
 
@@ -373,8 +387,13 @@ vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
    assert(dec);
 
-   for (i = 0; i < VL_MAX_PLANES; ++i)
+   memcpy(intra_quantizer_matrix, picture->intra_quantizer_matrix, sizeof(intra_quantizer_matrix));
+   intra_quantizer_matrix[0] = 1 << (7 - picture->intra_dc_precision);
+
+   for (i = 0; i < VL_MAX_PLANES; ++i) {
       vl_zscan_set_layout(&buf->zscan[i], picture->alternate_scan ? dec->zscan_alternate : dec->zscan_normal);
+      vl_zscan_upload_quant(&buf->zscan[i], intra_quantizer_matrix, picture->non_intra_quantizer_matrix);
+   }
 
    vl_mpg12_bs_decode(&buf->bs, num_bytes, data, picture, num_ycbcr_blocks);
 }
diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
index 0f468dfb5ab..4af3962209f 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.c
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -136,11 +136,11 @@ create_vert_shader(struct vl_zscan *zscan)
    ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
 
-   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), ureg_scalar(instance, TGSI_SWIZZLE_X),
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(instance, TGSI_SWIZZLE_X),
             ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
 
    ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
-   ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp));
+   ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp));
 
    for (i = 0; i < zscan->num_channels; ++i) {
       ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
@@ -149,7 +149,8 @@ create_vert_shader(struct vl_zscan *zscan)
       ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
                ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
       ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect);
-      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_src(tmp),
+      ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos);
+      ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp),
                ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total));
    }
 
@@ -165,10 +166,10 @@ create_frag_shader(struct vl_zscan *zscan)
    struct ureg_program *shader;
    struct ureg_src vtex[zscan->num_channels];
 
-   struct ureg_src src, scan, quant;
+   struct ureg_src samp_src, samp_scan, samp_quant;
 
    struct ureg_dst tmp[zscan->num_channels];
-   struct ureg_dst fragment;
+   struct ureg_dst quant, fragment;
 
    unsigned i;
 
@@ -179,12 +180,13 @@ create_frag_shader(struct vl_zscan *zscan)
    for (i = 0; i < zscan->num_channels; ++i)
       vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR);
 
-   src = ureg_DECL_sampler(shader, 0);
-   scan = ureg_DECL_sampler(shader, 1);
-   quant = ureg_DECL_sampler(shader, 2);
+   samp_src = ureg_DECL_sampler(shader, 0);
+   samp_scan = ureg_DECL_sampler(shader, 1);
+   samp_quant = ureg_DECL_sampler(shader, 2);
 
    for (i = 0; i < zscan->num_channels; ++i)
       tmp[i] = ureg_DECL_temporary(shader);
+   quant = ureg_DECL_temporary(shader);
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
@@ -194,17 +196,18 @@ create_frag_shader(struct vl_zscan *zscan)
     * fragment = tex(tmp, 0) * quant
     */
    for (i = 0; i < zscan->num_channels; ++i)
-      ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], scan);
+      ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan);
 
    for (i = 0; i < zscan->num_channels; ++i)
-      ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_Z));
+      ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W));
 
-   for (i = 0; i < zscan->num_channels; ++i)
-      ureg_TEX(shader, tmp[i], TGSI_TEXTURE_2D, ureg_src(tmp[i]), src);
+   for (i = 0; i < zscan->num_channels; ++i) {
+      ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src);
+      ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant);
+   }
 
-   // TODO: Fetch quant and use it
-   for (i = 0; i < zscan->num_channels; ++i)
-      ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), ureg_src(tmp[i]), ureg_imm1f(shader, 1.0f));
+   ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f));
+   ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant));
 
    for (i = 0; i < zscan->num_channels; ++i)
       ureg_release_temporary(shader, tmp[i]);
@@ -283,7 +286,7 @@ init_state(struct vl_zscan *zscan)
       memset(&sampler, 0, sizeof(sampler));
       sampler.wrap_s = PIPE_TEX_WRAP_REPEAT;
       sampler.wrap_t = PIPE_TEX_WRAP_REPEAT;
-      sampler.wrap_r = PIPE_TEX_WRAP_REPEAT;
+      sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
       sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST;
       sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
       sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
@@ -413,15 +416,6 @@ error_resource:
    return NULL;
 }
 
-#if 0
-// TODO
-struct pipe_sampler_view *
-vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line);
-
-struct pipe_sampler_view *
-vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line);
-#endif
-
 bool
 vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
               unsigned buffer_width, unsigned buffer_height,
@@ -457,16 +451,13 @@ vl_zscan_cleanup(struct vl_zscan *zscan)
    cleanup_state(zscan);
 }
 
-#if 0
-// TODO
-void
-vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
-#endif
-
 bool
 vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
                      struct pipe_sampler_view *src, struct pipe_surface *dst)
 {
+   struct pipe_resource res_tmpl, *res;
+   struct pipe_sampler_view sv_tmpl;
+
    assert(zscan && buffer);
 
    memset(buffer, 0, sizeof(struct vl_zscan_buffer));
@@ -489,6 +480,28 @@ vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
    buffer->fb_state.nr_cbufs = 1;
    pipe_surface_reference(&buffer->fb_state.cbufs[0], dst);
 
+   memset(&res_tmpl, 0, sizeof(res_tmpl));
+   res_tmpl.target = PIPE_TEXTURE_3D;
+   res_tmpl.format = PIPE_FORMAT_R8_UNORM;
+   res_tmpl.width0 = BLOCK_WIDTH * zscan->blocks_per_line;
+   res_tmpl.height0 = BLOCK_HEIGHT;
+   res_tmpl.depth0 = 2;
+   res_tmpl.array_size = 1;
+   res_tmpl.usage = PIPE_USAGE_IMMUTABLE;
+   res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW;
+
+   res = zscan->pipe->screen->resource_create(zscan->pipe->screen, &res_tmpl);
+   if (!res)
+      return false;
+
+   memset(&sv_tmpl, 0, sizeof(sv_tmpl));
+   u_sampler_view_default_template(&sv_tmpl, res, res->format);
+   sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = TGSI_SWIZZLE_X;
+   buffer->quant = zscan->pipe->create_sampler_view(zscan->pipe, res, &sv_tmpl);
+   pipe_resource_reference(&res, NULL);
+   if (!buffer->quant)
+      return false;
+
    return true;
 }
 
@@ -512,6 +525,65 @@ vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *la
    pipe_sampler_view_reference(&buffer->layout, layout);
 }
 
+void
+vl_zscan_upload_quant(struct vl_zscan_buffer *buffer,
+                      const uint8_t intra_matrix[64],
+                      const uint8_t non_intra_matrix[64])
+{
+   struct pipe_context *pipe;
+   struct pipe_transfer *buf_transfer;
+   unsigned x, y, i, pitch;
+   uint8_t *intra, *non_intra;
+
+   struct pipe_box rect =
+   {
+      0, 0, 0,
+      BLOCK_WIDTH,
+      BLOCK_HEIGHT,
+      2
+   };
+
+   assert(buffer);
+   assert(intra_matrix);
+   assert(non_intra_matrix);
+
+   pipe = buffer->zscan->pipe;
+
+   rect.width *= buffer->zscan->blocks_per_line;
+
+   buf_transfer = pipe->get_transfer
+   (
+      pipe, buffer->quant->texture,
+      0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &rect
+   );
+   if (!buf_transfer)
+      goto error_transfer;
+
+   pitch = buf_transfer->stride;
+
+   non_intra = pipe->transfer_map(pipe, buf_transfer);
+   if (!non_intra)
+      goto error_map;
+
+   intra = non_intra + BLOCK_HEIGHT * pitch;
+
+   for (i = 0; i < buffer->zscan->blocks_per_line; ++i)
+      for (y = 0; y < BLOCK_HEIGHT; ++y)
+         for (x = 0; x < BLOCK_WIDTH; ++x) {
+            intra[i * BLOCK_WIDTH + y * pitch + x] = intra_matrix[x + y * BLOCK_WIDTH];
+            non_intra[i * BLOCK_WIDTH + y * pitch + x] = non_intra_matrix[x + y * BLOCK_WIDTH];
+         }
+
+   pipe->transfer_unmap(pipe, buf_transfer);
+
+error_map:
+   pipe->transfer_destroy(pipe, buf_transfer);
+
+error_transfer:
+   return;
+}
+
 void
 vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)
 {
@@ -523,10 +595,10 @@ vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances)
 
    zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state);
    zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend);
-   zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 2, zscan->samplers);
+   zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 3, zscan->samplers);
    zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state);
    zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport);
-   zscan->pipe->set_fragment_sampler_views(zscan->pipe, 2, &buffer->src);
+   zscan->pipe->set_fragment_sampler_views(zscan->pipe, 3, &buffer->src);
    zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs);
    zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs);
    util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h
index ccc6bc4a8a1..be12b8e873a 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.h
+++ b/src/gallium/auxiliary/vl/vl_zscan.h
@@ -53,8 +53,6 @@ struct vl_zscan
    void *samplers[3];
 
    void *vs, *fs;
-
-   struct pipe_sampler_view *quant;
 };
 
 struct vl_zscan_buffer
@@ -84,11 +82,6 @@ vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe,
 void
 vl_zscan_cleanup(struct vl_zscan *zscan);
 
-#if 0
-void
-vl_zscan_upload_quant(struct vl_zscan *zscan, ...);
-#endif
-
 bool
 vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer,
                      struct pipe_sampler_view *src, struct pipe_surface *dst);
@@ -99,6 +92,11 @@ vl_zscan_cleanup_buffer(struct vl_zscan_buffer *buffer);
 void
 vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout);
 
+void
+vl_zscan_upload_quant(struct vl_zscan_buffer *buffer,
+                      const uint8_t intra_matrix[64],
+                      const uint8_t non_intra_matrix[64]);
+
 void
 vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances);
 
-- 
cgit v1.2.3


From a019b60dd37c546a29ca42209bb2f31eec3456d4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 1 Jun 2011 19:41:43 +0200
Subject: [g3dvl] dynamical adjust blocks per line

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index ca790e7bc75..30fdc758db8 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -94,7 +94,7 @@ init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
    formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
    buffer->zscan_source = vl_video_buffer_init(dec->base.context, dec->pipe,
                                                dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
-                                               dec->max_blocks / dec->blocks_per_line,
+                                               align(dec->max_blocks, dec->blocks_per_line) / dec->blocks_per_line,
                                                1, PIPE_VIDEO_CHROMA_FORMAT_444,
                                                formats, PIPE_USAGE_STATIC);
    if (!buffer->zscan_source)
@@ -680,14 +680,13 @@ find_format_config(struct vl_mpeg12_decoder *dec, const struct format_config con
 static bool
 init_zscan(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
 {
+   const unsigned block_size_pixels = BLOCK_WIDTH * BLOCK_HEIGHT;
    unsigned num_channels;
 
    assert(dec);
 
-   dec->blocks_per_line = 4;
-   dec->max_blocks =
-      (dec->base.width * dec->base.height) /
-      (BLOCK_WIDTH * BLOCK_HEIGHT);
+   dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4);
+   dec->max_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
 
    dec->zscan_source_format = format_config->zscan_source_format;
    dec->zscan_linear = vl_zscan_layout(dec->pipe, vl_zscan_linear, dec->blocks_per_line);
-- 
cgit v1.2.3


From ded2a9a628c58c2c8bfaaf6b8dd213e68de1dd20 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 1 Jun 2011 20:37:57 +0200
Subject: [g3dvl] respect maximum instruction for idct render targets

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 30fdc758db8..35bf79261d3 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -709,15 +709,20 @@ init_zscan(struct vl_mpeg12_decoder *dec, const struct format_config* format_con
 static bool
 init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
 {
-   unsigned nr_of_idct_render_targets;
+   unsigned nr_of_idct_render_targets, max_inst;
    enum pipe_format formats[3];
 
    struct pipe_sampler_view *matrix = NULL;
 
    nr_of_idct_render_targets = dec->pipe->screen->get_param(dec->pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS);
+   max_inst = dec->pipe->screen->get_shader_param(dec->pipe->screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_INSTRUCTIONS);
 
-   // more than 4 render targets usually doesn't makes any seens
-   nr_of_idct_render_targets = MIN2(nr_of_idct_render_targets, 4);
+   // Just assume we need 32 inst per render target, not 100% true, but should work in most cases
+   if (nr_of_idct_render_targets >= 4 && max_inst >= 32*4)
+      // more than 4 render targets usually doesn't makes any seens
+      nr_of_idct_render_targets = 4;
+   else
+      nr_of_idct_render_targets = 1;
 
    formats[0] = formats[1] = formats[2] = format_config->idct_source_format;
    dec->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-- 
cgit v1.2.3


From f82cfe1eb40b1de9f6d4cbdce2c509e3e429c1d4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 4 Jun 2011 12:55:43 +0200
Subject: Revert "Merge remote-tracking branch 'mareko/r300g-draw-instanced'
 into pipe-video"

This reverts commit 10370b752cd793665feec9494e6545a3f04c69f9, reversing
changes made to ded2a9a628c58c2c8bfaaf6b8dd213e68de1dd20.
---
 src/gallium/drivers/r300/r300_context.c       | 26 --------------
 src/gallium/drivers/r300/r300_context.h       |  3 --
 src/gallium/drivers/r300/r300_emit.c          | 43 +++++------------------
 src/gallium/drivers/r300/r300_render.c        | 12 -------
 src/gallium/drivers/r300/r300_state.c         | 33 +++---------------
 src/gallium/drivers/r300/r300_state_derived.c |  4 +--
 src/gallium/drivers/r300/r300_tgsi_to_rc.c    | 49 ++-------------------------
 src/gallium/drivers/r300/r300_tgsi_to_rc.h    |  3 --
 src/gallium/drivers/r300/r300_vs.c            |  2 +-
 9 files changed, 17 insertions(+), 158 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index b8053f51455..0554c40eef0 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -81,7 +81,6 @@ static void r300_release_referenced_objects(struct r300_context *r300)
     /* Manually-created vertex buffers. */
     pipe_resource_reference(&r300->dummy_vb, NULL);
     pipe_resource_reference(&r300->vbo, NULL);
-    pipe_resource_reference((struct pipe_resource**)&r300->vb_instanceid, NULL);
 
     /* If there are any queries pending or not destroyed, remove them now. */
     foreach_s(query, temp, &r300->query_list) {
@@ -494,31 +493,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
         r300->dummy_vb = screen->resource_create(screen, &vb);
     }
 
-    {
-        int i, num = 128000;
-        struct pipe_resource vb, *r;
-        struct pipe_transfer *transfer;
-        float *buf;
-
-        memset(&vb, 0, sizeof(vb));
-        vb.target = PIPE_BUFFER;
-        vb.format = PIPE_FORMAT_R8_UNORM;
-        vb.bind = PIPE_BIND_VERTEX_BUFFER;
-        vb.usage = PIPE_USAGE_IMMUTABLE;
-        vb.width0 = 4 * num;
-        vb.height0 = 1;
-        vb.depth0 = 1;
-
-        r = screen->resource_create(screen, &vb);
-
-        buf = pipe_buffer_map(&r300->context, r, PIPE_TRANSFER_WRITE, &transfer);
-        for (i = 0; i < num; i++)
-            buf[i] = i;
-        pipe_buffer_unmap(&r300->context, transfer);
-
-        r300->vb_instanceid = r300_resource(r);
-    }
-
     {
         struct pipe_depth_stencil_alpha_state dsa;
         memset(&dsa, 0, sizeof(dsa));
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 3f91666ac16..139dd210b8f 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -431,7 +431,6 @@ struct r300_vertex_element_state {
     unsigned vertex_size_dwords;
 
     struct r300_vertex_stream_state vertex_stream;
-    struct r300_vertex_stream_state vertex_stream_instanced;
 };
 
 enum r300_hiz_func {
@@ -491,8 +490,6 @@ struct r300_context {
     /* When no vertex buffer is set, this one is used instead to prevent
      * hardlocks. */
     struct pipe_resource *dummy_vb;
-    /* Vertex buffer for InstanceID. */
-    struct r300_resource *vb_instanceid;
 
     /* The currently active query. */
     struct r300_query *query_current;
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 5c85c9c5594..874037ed9fd 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -815,17 +815,15 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
     struct r300_resource *buf;
     int i;
     unsigned vertex_array_count = r300->velems->count;
-    unsigned real_vertex_array_count = vertex_array_count +
-                (vertex_array_count == 16 || instance_id == -1 ? 0 : 1);
-    unsigned packet_size = (real_vertex_array_count * 3 + 1) / 2;
+    unsigned packet_size = (vertex_array_count * 3 + 1) / 2;
     struct pipe_vertex_buffer *vb1, *vb2;
     unsigned *hw_format_size = r300->velems->format_size;
     unsigned size1, size2, offset1, offset2, stride1, stride2;
     CS_LOCALS(r300);
 
-    BEGIN_CS(2 + packet_size + real_vertex_array_count * 2);
+    BEGIN_CS(2 + packet_size + vertex_array_count * 2);
     OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
-    OUT_CS(real_vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
+    OUT_CS(vertex_array_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
 
     if (instance_id == -1) {
         /* Non-instanced arrays. This ignores instance_divisor and instance_id. */
@@ -897,28 +895,14 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset,
                 offset1 = vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride;
             }
 
-            /* Insert vertex buffer containing InstanceID. */
-            if (vertex_array_count < 16) {
-                OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1) |
-                       R300_VBPNTR_SIZE1(4));
-                OUT_CS(offset1);
-                OUT_CS(4 * instance_id);
-            } else {
-                OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1));
-                OUT_CS(offset1);
-            }
-        } else if (vertex_array_count < 16) {
-            /* Insert vertex buffer containing InstanceID. */
-            OUT_CS(R300_VBPNTR_SIZE0(4));
-            OUT_CS(4 * instance_id);
+            OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(stride1));
+            OUT_CS(offset1);
         }
 
         for (i = 0; i < vertex_array_count; i++) {
             buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]);
             OUT_CS_RELOC(buf);
         }
-        if (vertex_array_count < 16)
-            OUT_CS_RELOC(r300->vb_instanceid);
     }
     END_CS;
 }
@@ -951,18 +935,11 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed)
 void r300_emit_vertex_stream_state(struct r300_context* r300,
                                    unsigned size, void* state)
 {
-    struct r300_vertex_element_state *velems =
-        (struct r300_vertex_element_state*)state;
-    struct r300_vertex_stream_state *streams;
+    struct r300_vertex_stream_state *streams =
+        (struct r300_vertex_stream_state*)state;
     unsigned i;
     CS_LOCALS(r300);
 
-    if (r300->screen->caps.has_tcl && r300->instancing_enabled) {
-        streams = &velems->vertex_stream_instanced;
-    } else {
-        streams = &velems->vertex_stream;
-    }
-
     if (DBG_ON(r300, DBG_PSC)) {
         fprintf(stderr, "r300: PSC emit:\n");
 
@@ -977,7 +954,7 @@ void r300_emit_vertex_stream_state(struct r300_context* r300,
         }
     }
 
-    BEGIN_CS((1 + streams->count) * 2);
+    BEGIN_CS(size);
     OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count);
     OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count);
     OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count);
@@ -1241,10 +1218,6 @@ validate:
             r300->rws->cs_add_reloc(r300->cs, r300_resource(*buf)->cs_buf,
                                     r300_resource(*buf)->domain, 0);
         }
-        if (r300->instancing_enabled) {
-            r300->rws->cs_add_reloc(r300->cs, r300->vb_instanceid->cs_buf,
-                                    r300->vb_instanceid->domain, 0);
-        }
     }
     /* ...and index buffer for HWTCL path. */
     if (index_buffer)
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 3674edc975f..429b85545f7 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -305,18 +305,6 @@ static boolean r300_prepare_for_rendering(struct r300_context *r300,
                                           int index_bias,
                                           int instance_id)
 {
-    /* Update vertex elements for InstanceID here. */
-    boolean instancing_enabled = instance_id != -1;
-
-    if (r300->screen->caps.has_tcl &&
-        (flags & PREP_EMIT_AOS) &&
-        instancing_enabled != r300->instancing_enabled) {
-        r300->instancing_enabled = instancing_enabled;
-        r300_mark_atom_dirty(r300, &r300->vertex_stream_state);
-        r300->vertex_arrays_dirty = TRUE;
-        flags |= PREP_EMIT_STATES;
-    }
-
     /* Make sure there is enough space in the command stream and emit states. */
     if (r300_reserve_cs_dwords(r300, flags, cs_dwords))
         flags |= PREP_EMIT_STATES;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 5f0e73b617d..bc6c67dd034 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1604,10 +1604,9 @@ static void r300_set_index_buffer(struct pipe_context* pipe,
 }
 
 /* Initialize the PSC tables. */
-static void r300_vertex_psc(struct r300_vertex_element_state *velems,
-                            struct r300_vertex_stream_state *vstream,
-                            boolean insert_instance_id_attrib)
+static void r300_vertex_psc(struct r300_vertex_element_state *velems)
 {
+    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
     uint16_t type, swizzle;
     enum pipe_format format;
     unsigned i;
@@ -1638,27 +1637,6 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems,
         }
     }
 
-    /* Insert attrib emulating InstanceID. */
-    if (i < 15 && insert_instance_id_attrib) {
-        format = PIPE_FORMAT_R32_FLOAT;
-
-        type = r300_translate_vertex_data_type(format);
-        assert(type != R300_INVALID_FORMAT);
-
-        type |= i << R300_DST_VEC_LOC_SHIFT;
-        swizzle = r300_translate_vertex_data_swizzle(format);
-
-        if (i & 1) {
-            vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
-            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
-        } else {
-            vstream->vap_prog_stream_cntl[i >> 1] |= type;
-            vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
-        }
-
-        i++;
-    }
-
     /* Set the last vector in the PSC. */
     if (i) {
         i -= 1;
@@ -1701,8 +1679,7 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
     if (r300_screen(pipe->screen)->caps.has_tcl) {
         /* Setup PSC.
          * The unused components will be replaced by (..., 0, 1). */
-        r300_vertex_psc(velems, &velems->vertex_stream, FALSE);
-        r300_vertex_psc(velems, &velems->vertex_stream_instanced, TRUE);
+        r300_vertex_psc(velems);
 
         for (i = 0; i < count; i++) {
             velems->format_size[i] =
@@ -1733,8 +1710,8 @@ static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
         return;
     }
 
-    UPDATE_STATE(velems, r300->vertex_stream_state);
-    r300->vertex_stream_state.size = (1 + velems->vertex_stream_instanced.count) * 2;
+    UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
+    r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
     r300->vertex_arrays_dirty = TRUE;
 }
 
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index acf2021704c..04499c78cc6 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -133,9 +133,7 @@ static void r300_draw_emit_all_attribs(struct r300_context* r300)
 /* Update the PSC tables for SW TCL, using Draw. */
 static void r300_swtcl_vertex_psc(struct r300_context *r300)
 {
-    struct r300_vertex_element_state *velems =
-            (struct r300_vertex_element_state*)r300->vertex_stream_state.state;
-    struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
+    struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state;
     struct vertex_info *vinfo = &r300->vertex_info;
     uint16_t type, swizzle;
     enum pipe_format format;
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 30377ffa59b..0561ab9bfa4 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -25,7 +25,6 @@
 #include "radeon_compiler.h"
 #include "radeon_program.h"
 
-#include "util/u_math.h"
 #include "tgsi/tgsi_info.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
@@ -169,7 +168,6 @@ static unsigned translate_register_file(unsigned file)
             /* fall-through */
         case TGSI_FILE_TEMPORARY: return RC_FILE_TEMPORARY;
         case TGSI_FILE_ADDRESS: return RC_FILE_ADDRESS;
-        case TGSI_FILE_SYSTEM_VALUE: return RC_FILE_INPUT;
     }
 }
 
@@ -181,17 +179,6 @@ static int translate_register_index(
     if (file == TGSI_FILE_IMMEDIATE)
         return ttr->immediate_offset + index;
 
-    if (file == TGSI_FILE_SYSTEM_VALUE) {
-        if (index == ttr->instance_id) {
-            return ttr->num_inputs;
-        } else {
-            fprintf(stderr, "Unknown system value semantic index: %i\n",
-                    index);
-            ttr->error = TRUE;
-            return 0;
-        }
-    }
-
     return index;
 }
 
@@ -282,8 +269,7 @@ static void transform_texture(struct rc_instruction * dst, struct tgsi_instructi
     dst->U.I.TexSwizzle = RC_SWIZZLE_XYZW;
 }
 
-static void transform_instruction(struct tgsi_to_rc * ttr,
-                                  struct tgsi_full_instruction * src)
+static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src)
 {
     struct rc_instruction * dst;
     int i;
@@ -343,27 +329,6 @@ static void handle_immediate(struct tgsi_to_rc * ttr,
     }
 }
 
-static void handle_declaration(struct tgsi_to_rc *ttr,
-                               struct tgsi_full_declaration *decl)
-{
-    switch (decl->Declaration.File) {
-    case TGSI_FILE_INPUT:
-        ttr->num_inputs = MAX2(ttr->num_inputs, decl->Range.First + 1);
-        break;
-
-    case TGSI_FILE_SYSTEM_VALUE:
-        if (decl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
-            printf("Got instance id\n");
-            ttr->instance_id = decl->Range.First;
-        } else {
-            fprintf(stderr, "Unknown system value semantic: %i.\n",
-                    decl->Semantic.Name);
-            ttr->error = TRUE;
-        }
-        break;
-    }
-}
-
 void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
                      const struct tgsi_token * tokens)
 {
@@ -372,8 +337,6 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
     unsigned imm_index = 0;
     int i;
 
-    ttr->num_inputs = 0;
-    ttr->instance_id = -1;
     ttr->error = FALSE;
 
     /* Allocate constants placeholders.
@@ -400,29 +363,21 @@ void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
 
         switch (parser.FullToken.Token.Type) {
             case TGSI_TOKEN_TYPE_DECLARATION:
-                handle_declaration(ttr, &parser.FullToken.FullDeclaration);
-                if (ttr->error)
-                    goto end_while;
                 break;
-
             case TGSI_TOKEN_TYPE_IMMEDIATE:
                 handle_immediate(ttr, &parser.FullToken.FullImmediate, imm_index);
                 imm_index++;
                 break;
-
             case TGSI_TOKEN_TYPE_INSTRUCTION:
                 inst = &parser.FullToken.FullInstruction;
                 if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
-                    goto end_while;
+                    break;
                 }
 
                 transform_instruction(ttr, inst);
-                if (ttr->error)
-                    goto end_while;
                 break;
         }
     }
-end_while:
 
     tgsi_parse_free(&parser);
 
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
index c9bd6277266..adb044cfe56 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.h
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
@@ -44,9 +44,6 @@ struct tgsi_to_rc {
     struct swizzled_imms * imms_to_swizzle;
     unsigned imms_to_swizzle_count;
 
-    int num_inputs;
-    int instance_id;
-
     /* Vertex shaders have no half swizzles, and no way to handle them, so
      * until rc grows proper support, indicate if they're safe to use. */
     boolean use_half_swizzles;
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 90eba5a8f45..b319890157f 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -103,7 +103,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
                               outputs->bcolor[1] != ATTR_UNUSED;
 
     /* Fill in the input mapping */
-    for (i = 0; i < info->num_inputs+1; i++)
+    for (i = 0; i < info->num_inputs; i++)
         c->code->inputs[i] = i;
 
     /* Position. */
-- 
cgit v1.2.3


From a6c76c8a90dc8995feed3c61b02dbd8408149036 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 4 Jun 2011 23:54:46 +0200
Subject: [g3dvl] use a vertex element instead of the instance id

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 21 ++++++-----
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h |  3 +-
 src/gallium/auxiliary/vl/vl_vertex_buffers.c | 56 +++++++++++++++++++++++++++-
 src/gallium/auxiliary/vl/vl_vertex_buffers.h | 16 +++++---
 src/gallium/auxiliary/vl/vl_zscan.c          | 10 ++---
 5 files changed, 85 insertions(+), 21 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 35bf79261d3..201f3abab13 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -94,7 +94,7 @@ init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
    formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
    buffer->zscan_source = vl_video_buffer_init(dec->base.context, dec->pipe,
                                                dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
-                                               align(dec->max_blocks, dec->blocks_per_line) / dec->blocks_per_line,
+                                               align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line,
                                                1, PIPE_VIDEO_CHROMA_FORMAT_444,
                                                formats, PIPE_USAGE_STATIC);
    if (!buffer->zscan_source)
@@ -563,12 +563,14 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
       }
    }
 
+   vb[2] = dec->block_num;
+
    dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
       if (!num_ycbcr_blocks[i]) continue;
 
       vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
-      dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
+      dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
 
       vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]);
 
@@ -585,7 +587,7 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
          if (!num_ycbcr_blocks[i]) continue;
 
          vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component);
-         dec->pipe->set_vertex_buffers(dec->pipe, 2, vb);
+         dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
 
          if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
             vl_idct_prepare_stage2(component == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[component]);
@@ -680,14 +682,10 @@ find_format_config(struct vl_mpeg12_decoder *dec, const struct format_config con
 static bool
 init_zscan(struct vl_mpeg12_decoder *dec, const struct format_config* format_config)
 {
-   const unsigned block_size_pixels = BLOCK_WIDTH * BLOCK_HEIGHT;
    unsigned num_channels;
 
    assert(dec);
 
-   dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4);
-   dec->max_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
-
    dec->zscan_source_format = format_config->zscan_source_format;
    dec->zscan_linear = vl_zscan_layout(dec->pipe, vl_zscan_linear, dec->blocks_per_line);
    dec->zscan_normal = vl_zscan_layout(dec->pipe, vl_zscan_normal, dec->blocks_per_line);
@@ -696,11 +694,11 @@ init_zscan(struct vl_mpeg12_decoder *dec, const struct format_config* format_con
    num_channels = dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 4 : 1;
 
    if (!vl_zscan_init(&dec->zscan_y, dec->pipe, dec->base.width, dec->base.height,
-                      dec->blocks_per_line, dec->max_blocks, num_channels))
+                      dec->blocks_per_line, dec->num_blocks, num_channels))
       return false;
 
    if (!vl_zscan_init(&dec->zscan_c, dec->pipe, dec->chroma_width, dec->chroma_height,
-                      dec->blocks_per_line, dec->max_blocks, num_channels))
+                      dec->blocks_per_line, dec->num_blocks, num_channels))
       return false;
 
    return true;
@@ -838,6 +836,7 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
                          enum pipe_video_chroma_format chroma_format,
                          unsigned width, unsigned height)
 {
+   const unsigned block_size_pixels = BLOCK_WIDTH * BLOCK_HEIGHT;
    const struct format_config *format_config;
    struct vl_mpeg12_decoder *dec;
 
@@ -861,12 +860,16 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
 
    dec->pipe = pipe;
 
+   dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4);
+   dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
+
    dec->quads = vl_vb_upload_quads(dec->pipe);
    dec->pos = vl_vb_upload_pos(
       dec->pipe,
       dec->base.width / MACROBLOCK_WIDTH,
       dec->base.height / MACROBLOCK_HEIGHT
    );
+   dec->block_num = vl_vb_upload_block_num(dec->pipe, dec->num_blocks);
 
    dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->pipe);
    dec->ves_mv = vl_vb_get_ves_mv(dec->pipe);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 4fc19ff9524..474ae2d5d29 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -49,12 +49,13 @@ struct vl_mpeg12_decoder
    unsigned chroma_width, chroma_height;
 
    unsigned blocks_per_line;
-   unsigned max_blocks;
+   unsigned num_blocks;
 
    enum pipe_format zscan_source_format;
 
    struct pipe_vertex_buffer quads;
    struct pipe_vertex_buffer pos;
+   struct pipe_vertex_buffer block_num;
 
    void *ves_ycbcr;
    void *ves_mv;
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
index c45f96cec51..c0f1449bf80 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c
@@ -125,6 +125,49 @@ vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height)
    return pos;
 }
 
+struct pipe_vertex_buffer
+vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks)
+{
+   struct pipe_vertex_buffer buf;
+   struct pipe_transfer *buf_transfer;
+   struct vertex2s *v;
+   unsigned i;
+
+   assert(pipe);
+
+   /* create buffer */
+   buf.stride = sizeof(struct vertex2s);
+   buf.buffer_offset = 0;
+   buf.buffer = pipe_buffer_create
+   (
+      pipe->screen,
+      PIPE_BIND_VERTEX_BUFFER,
+      PIPE_USAGE_STATIC,
+      sizeof(struct vertex2s) * num_blocks
+   );
+
+   if(!buf.buffer)
+      return buf;
+
+   /* and fill it */
+   v = pipe_buffer_map
+   (
+      pipe,
+      buf.buffer,
+      PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
+      &buf_transfer
+   );
+
+   for ( i = 0; i < num_blocks; ++i, ++v) {
+      v->x = i;
+      v->y = i;
+   }
+
+   pipe_buffer_unmap(pipe, buf_transfer);
+
+   return buf;
+}
+
 static struct pipe_vertex_element
 vl_vb_get_quad_vertex_element(void)
 {
@@ -170,7 +213,12 @@ vl_vb_get_ves_ycbcr(struct pipe_context *pipe)
 
    vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1);
 
-   return pipe->create_vertex_elements_state(pipe, 2, vertex_elems);
+   /* block num element */
+   vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R16G16_SSCALED;
+
+   vl_vb_element_helper(&vertex_elems[VS_I_BLOCK_NUM], 1, 2);
+
+   return pipe->create_vertex_elements_state(pipe, 3, vertex_elems);
 }
 
 void *
@@ -249,6 +297,12 @@ error_ycbcr:
    return false;
 }
 
+unsigned
+vl_vb_attributes_per_plock(struct vl_vertex_buffer *buffer)
+{
+   return 1;
+}
+
 struct pipe_vertex_buffer
 vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component)
 {
diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
index 7e727cbca12..74845a42b69 100644
--- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h
+++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h
@@ -41,13 +41,15 @@
 /* inputs to the vertex shaders */
 enum VS_INPUT
 {
-   VS_I_RECT,
-   VS_I_VPOS,
+   VS_I_RECT = 0,
+   VS_I_VPOS = 1,
 
-   VS_I_MV_TOP,
-   VS_I_MV_BOTTOM,
+   VS_I_BLOCK_NUM = 2,
 
-   NUM_VS_INPUTS
+   VS_I_MV_TOP = 2,
+   VS_I_MV_BOTTOM = 3,
+
+   NUM_VS_INPUTS = 4
 };
 
 struct vl_vertex_buffer
@@ -71,6 +73,8 @@ struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe);
 
 struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height);
 
+struct pipe_vertex_buffer vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks);
+
 void *vl_vb_get_ves_ycbcr(struct pipe_context *pipe);
 
 void *vl_vb_get_ves_mv(struct pipe_context *pipe);
@@ -79,6 +83,8 @@ bool vl_vb_init(struct vl_vertex_buffer *buffer,
                 struct pipe_context *pipe,
                 unsigned width, unsigned height);
 
+unsigned vl_vb_attributes_per_plock(struct vl_vertex_buffer *buffer);
+
 void vl_vb_map(struct vl_vertex_buffer *buffer, struct pipe_context *pipe);
 
 struct pipe_vertex_buffer vl_vb_get_ycbcr(struct vl_vertex_buffer *buffer, int component);
diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
index 4af3962209f..2b52d610f8f 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.c
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -92,8 +92,8 @@ create_vert_shader(struct vl_zscan *zscan)
 {
    struct ureg_program *shader;
 
-   struct ureg_src scale, instance;
-   struct ureg_src vrect, vpos;
+   struct ureg_src scale;
+   struct ureg_src vrect, vpos, block_num;
 
    struct ureg_dst tmp;
    struct ureg_dst o_vpos, o_vtex[zscan->num_channels];
@@ -108,11 +108,11 @@ create_vert_shader(struct vl_zscan *zscan)
       (float)BLOCK_WIDTH / zscan->buffer_width,
       (float)BLOCK_HEIGHT / zscan->buffer_height);
 
-   instance = ureg_DECL_system_value(shader, 0, TGSI_SEMANTIC_INSTANCEID, 0);
-
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
 
+   block_num = ureg_DECL_system_value(shader, 0, TGSI_SEMANTIC_INSTANCEID, 0);
+
    tmp = ureg_DECL_temporary(shader);
 
    o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS);
@@ -136,7 +136,7 @@ create_vert_shader(struct vl_zscan *zscan)
    ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale);
    ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f));
 
-   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(instance, TGSI_SWIZZLE_X),
+   ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(block_num, TGSI_SWIZZLE_X),
             ureg_imm1f(shader, 1.0f / zscan->blocks_per_line));
 
    ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
-- 
cgit v1.2.3


From 129a3c1cf0f9174d32db022b970ffb2127450ff9 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 5 Jun 2011 00:59:58 +0200
Subject: r600g: some merge fixes

---
 src/gallium/drivers/r600/eg_state_inlines.h   | 1 -
 src/gallium/drivers/r600/r600_state_inlines.h | 5 +++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_state_inlines.h b/src/gallium/drivers/r600/eg_state_inlines.h
index a2ba73febdd..b780dba3e33 100644
--- a/src/gallium/drivers/r600/eg_state_inlines.h
+++ b/src/gallium/drivers/r600/eg_state_inlines.h
@@ -377,7 +377,6 @@ static inline uint32_t r600_translate_colorswap(enum pipe_format format)
 	case PIPE_FORMAT_R32G32_FLOAT:
 	case PIPE_FORMAT_R16G16B16A16_UNORM:
 	case PIPE_FORMAT_R16G16B16A16_SNORM:
-	case PIPE_FORMAT_R16G16B16A16_SSCALED:
 	case PIPE_FORMAT_R16G16B16A16_FLOAT:
 
 		/* 128-bit buffers. */
diff --git a/src/gallium/drivers/r600/r600_state_inlines.h b/src/gallium/drivers/r600/r600_state_inlines.h
index 3254012ddf5..acd41a21214 100644
--- a/src/gallium/drivers/r600/r600_state_inlines.h
+++ b/src/gallium/drivers/r600/r600_state_inlines.h
@@ -455,8 +455,8 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
 	case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
 		return V_0280A0_COLOR_24_8;
 
-	//case PIPE_FORMAT_R32_FLOAT:
-	//	return V_0280A0_COLOR_32_FLOAT;
+	case PIPE_FORMAT_R32_FLOAT:
+		return V_0280A0_COLOR_32_FLOAT;
 
 	case PIPE_FORMAT_R16G16_FLOAT:
 		return V_0280A0_COLOR_16_16_FLOAT;
@@ -472,6 +472,7 @@ static INLINE uint32_t r600_translate_colorformat(enum pipe_format format)
 	case PIPE_FORMAT_R16G16B16_USCALED:
 	case PIPE_FORMAT_R16G16B16A16_USCALED:
 	case PIPE_FORMAT_R16G16B16_SSCALED:
+	case PIPE_FORMAT_R16G16B16A16_SSCALED:
 	case PIPE_FORMAT_R16G16B16A16_UNORM:
 	case PIPE_FORMAT_R16G16B16A16_SNORM:
 		return V_0280A0_COLOR_16_16_16_16;
-- 
cgit v1.2.3


From bdfe77444d85cbd0f269ad35388e27c03e654574 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 5 Jun 2011 01:28:44 +0200
Subject: st/mesa: some more merge fixes

---
 src/mesa/state_tracker/st_format.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 72d5a25336e..35835712547 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -514,8 +514,8 @@ st_pipe_format_to_mesa_format(enum pipe_format format)
       return MESA_FORMAT_SIGNED_L_LATC1;
    case PIPE_FORMAT_LATC2_UNORM:
       return MESA_FORMAT_LA_LATC2;
-   //case PIPE_FORMAT_LATC2_SNORM:
-   //   return MESA_FORMAT_SIGNED_LA_LATC2;
+   case PIPE_FORMAT_LATC2_SNORM:
+      return MESA_FORMAT_SIGNED_LA_LATC2;
 
    /* signed normalized formats */
    case PIPE_FORMAT_R8_SNORM:
-- 
cgit v1.2.3


From c6c2ef070957e5a0c0003e1d0a6d6e2fe197fb40 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 5 Jun 2011 16:39:10 +0200
Subject: [g3dvl] use a vertex element instead of the instance id

Enable this change permanently
---
 src/gallium/auxiliary/vl/vl_zscan.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c
index 2b52d610f8f..58cee0070d8 100644
--- a/src/gallium/auxiliary/vl/vl_zscan.c
+++ b/src/gallium/auxiliary/vl/vl_zscan.c
@@ -110,8 +110,7 @@ create_vert_shader(struct vl_zscan *zscan)
 
    vrect = ureg_DECL_vs_input(shader, VS_I_RECT);
    vpos = ureg_DECL_vs_input(shader, VS_I_VPOS);
-
-   block_num = ureg_DECL_system_value(shader, 0, TGSI_SEMANTIC_INSTANCEID, 0);
+   block_num = ureg_DECL_vs_input(shader, VS_I_BLOCK_NUM);
 
    tmp = ureg_DECL_temporary(shader);
 
-- 
cgit v1.2.3


From 7e1fbb360332ecac2789e28a0f3d303306f687b1 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 5 Jun 2011 17:07:17 +0200
Subject: [g3dvl] remove unused and dublicate fields from picture structure

---
 src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c |  6 +++---
 src/gallium/include/pipe/p_video_state.h       | 10 ----------
 src/gallium/state_trackers/vdpau/decode.c      |  1 +
 3 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index e7fbc31ce24..7a14efb627e 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -1209,7 +1209,7 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur
 
    memset(dest, 0, sizeof(int16_t) * 64);
    dest[0] = dc_dct_pred[cc];
-   if (picture->mpeg1) {
+   if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) {
       if (picture->picture_coding_type != D_TYPE)
           get_mpeg1_intra_block(bs, quantizer_scale, dest);
    } else if (picture->intra_vlc_format)
@@ -1236,7 +1236,7 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi
    bs->ycbcr_stream[cc]->coding = coding;
 
    memset(dest, 0, sizeof(int16_t) * 64);
-   if (picture->mpeg1)
+   if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1)
       get_mpeg1_non_intra_block(bs, quantizer_scale, dest);
    else
       get_non_intra_block(bs, quantizer_scale, dest);
@@ -1654,7 +1654,7 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture)
          if (picture->picture_structure == FRAME_PICTURE)
             switch (macroblock_modes & MOTION_TYPE_MASK) {
             case MC_FRAME:
-               if (picture->mpeg1) {
+               if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) {
                   MOTION_CALL(motion_mp1, macroblock_modes);
                } else {
                   MOTION_CALL(motion_fr_frame, macroblock_modes);
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index f46c2dd2bbf..6f64c38c6bc 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -118,18 +118,8 @@ struct pipe_mpeg12_picture_desc
    unsigned concealment_motion_vectors;
    unsigned f_code[2][2];
 
-   bool mpeg1;
    uint8_t *intra_quantizer_matrix;
    uint8_t *non_intra_quantizer_matrix;
-
-#if 0
-   /* TODO: Use bitfields where possible? */
-   unsigned top_field_first;
-   unsigned full_pel_forward_vector;
-   unsigned full_pel_backward_vector;
-   struct pipe_buffer *chroma_intra_quantizer_matrix;
-   struct pipe_buffer *chroma_non_intra_quantizer_matrix;
-#endif
 };
 
 #ifdef __cplusplus
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 0f658a92a11..de6f96babcb 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -183,6 +183,7 @@ vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder,
    }
 
    memset(&picture, 0, sizeof(picture));
+   picture.base.profile = decoder->profile;
    picture.picture_coding_type = picture_info->picture_coding_type;
    picture.picture_structure = picture_info->picture_structure;
    picture.frame_pred_frame_dct = picture_info->frame_pred_frame_dct;
-- 
cgit v1.2.3


From c4a168819dee9a2f9b4e7c7ab8d79bb50876d85d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 5 Jun 2011 17:53:48 +0200
Subject: [g3dvl] rename map/unmap to begin/end frame

mapping and unmapping of buffers is just an implementation detail.
begining and ending an frame is much more descriptive
---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   | 8 ++++----
 src/gallium/include/pipe/p_video_context.h     | 4 ++--
 src/gallium/state_trackers/vdpau/decode.c      | 4 ++--
 src/gallium/state_trackers/xorg/xvmc/surface.c | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 201f3abab13..bdca9dbe316 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -261,7 +261,7 @@ vl_mpeg12_buffer_destroy(struct pipe_video_decode_buffer *buffer)
 }
 
 static void
-vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer)
+vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer)
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
    struct vl_mpeg12_decoder *dec;
@@ -399,7 +399,7 @@ vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
 }
 
 static void
-vl_mpeg12_buffer_unmap(struct pipe_video_decode_buffer *buffer)
+vl_mpeg12_buffer_end_frame(struct pipe_video_decode_buffer *buffer)
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
    struct vl_mpeg12_decoder *dec;
@@ -472,13 +472,13 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
 
    buffer->base.decoder = decoder;
    buffer->base.destroy = vl_mpeg12_buffer_destroy;
-   buffer->base.map = vl_mpeg12_buffer_map;
+   buffer->base.begin_frame = vl_mpeg12_buffer_begin_frame;
    buffer->base.get_ycbcr_stream = vl_mpeg12_buffer_get_ycbcr_stream;
    buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer;
    buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride;
    buffer->base.get_mv_stream = vl_mpeg12_buffer_get_mv_stream;
    buffer->base.decode_bitstream = vl_mpeg12_buffer_decode_bitstream;
-   buffer->base.unmap = vl_mpeg12_buffer_unmap;
+   buffer->base.end_frame = vl_mpeg12_buffer_end_frame;
 
    if (!vl_vb_init(&buffer->vertex_stream, dec->pipe,
                    dec->base.width / MACROBLOCK_WIDTH,
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 512b5b22d77..374fa511054 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -177,7 +177,7 @@ struct pipe_video_decode_buffer
    /**
     * map the input buffer into memory before starting decoding
     */
-   void (*map)(struct pipe_video_decode_buffer *decbuf);
+   void (*begin_frame)(struct pipe_video_decode_buffer *decbuf);
 
    /**
     * get the pointer where to put the ycbcr blocks of a component
@@ -210,7 +210,7 @@ struct pipe_video_decode_buffer
    /**
     * unmap decoder buffer before flushing
     */
-   void (*unmap)(struct pipe_video_decode_buffer *decbuf);
+   void (*end_frame)(struct pipe_video_decode_buffer *decbuf);
 };
 
 /**
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index de6f96babcb..5c6d0467924 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -200,13 +200,13 @@ vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder,
    picture.intra_quantizer_matrix = picture_info->intra_quantizer_matrix;
    picture.non_intra_quantizer_matrix = picture_info->non_intra_quantizer_matrix;
 
-   buffer->map(buffer);
+   buffer->begin_frame(buffer);
 
    for (i = 0; i < bitstream_buffer_count; ++i)
       buffer->decode_bitstream(buffer, bitstream_buffers[i].bitstream_bytes,
                                bitstream_buffers[i].bitstream, &picture, num_ycbcr_blocks);
 
-   buffer->unmap(buffer);
+   buffer->end_frame(buffer);
 
    decoder->flush_buffer(buffer, num_ycbcr_blocks, ref_frames, target);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index da41a182164..4d6c58ebc76 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -277,7 +277,7 @@ unmap_and_flush_surface(XvMCSurfacePrivate *surface)
    }
 
    if (surface->mapped) {
-      surface->decode_buffer->unmap(surface->decode_buffer);
+      surface->decode_buffer->end_frame(surface->decode_buffer);
       for (i = 0; i < 3; ++i)
          num_ycbcr_blocks[i] = surface->ycbcr[i].num_blocks_added;
       context_priv->decoder->flush_buffer(surface->decode_buffer,
@@ -415,7 +415,7 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    }
 
    if (!target_surface_priv->mapped) {
-      t_buffer->map(t_buffer);
+      t_buffer->begin_frame(t_buffer);
 
       for (i = 0; i < 3; ++i) {
          target_surface_priv->ycbcr[i].num_blocks_added = 0;
-- 
cgit v1.2.3


From b4fa7db65639322ae8dea19a23c9cc8234a3d7e1 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 5 Jun 2011 18:59:57 +0200
Subject: [g3dvl] split quant matrix out of picture info

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 21 ++++++++++++++-------
 src/gallium/include/pipe/p_video_context.h   |  7 +++++++
 src/gallium/include/pipe/p_video_state.h     |  4 ----
 src/gallium/state_trackers/vdpau/decode.c    |  9 +++++----
 4 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index bdca9dbe316..238785f400d 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -330,6 +330,18 @@ vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer)
    }
 }
 
+static void
+vl_mpeg12_buffer_set_quant_matrix(struct pipe_video_decode_buffer *buffer,
+                                  uint8_t intra_matrix[64],
+                                  uint8_t non_intra_matrix[64])
+{
+   struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
+   unsigned i;
+
+   for (i = 0; i < VL_MAX_PLANES; ++i)
+      vl_zscan_upload_quant(&buf->zscan[i], intra_matrix, non_intra_matrix);
+}
+
 static struct pipe_ycbcr_block *
 vl_mpeg12_buffer_get_ycbcr_stream(struct pipe_video_decode_buffer *buffer, int component)
 {
@@ -378,7 +390,6 @@ vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
                                   unsigned num_ycbcr_blocks[3])
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
-   uint8_t intra_quantizer_matrix[64];
    struct vl_mpeg12_decoder *dec;
    unsigned i;
 
@@ -387,13 +398,8 @@ vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer,
    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
    assert(dec);
 
-   memcpy(intra_quantizer_matrix, picture->intra_quantizer_matrix, sizeof(intra_quantizer_matrix));
-   intra_quantizer_matrix[0] = 1 << (7 - picture->intra_dc_precision);
-
-   for (i = 0; i < VL_MAX_PLANES; ++i) {
+   for (i = 0; i < VL_MAX_PLANES; ++i)
       vl_zscan_set_layout(&buf->zscan[i], picture->alternate_scan ? dec->zscan_alternate : dec->zscan_normal);
-      vl_zscan_upload_quant(&buf->zscan[i], intra_quantizer_matrix, picture->non_intra_quantizer_matrix);
-   }
 
    vl_mpg12_bs_decode(&buf->bs, num_bytes, data, picture, num_ycbcr_blocks);
 }
@@ -473,6 +479,7 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    buffer->base.decoder = decoder;
    buffer->base.destroy = vl_mpeg12_buffer_destroy;
    buffer->base.begin_frame = vl_mpeg12_buffer_begin_frame;
+   buffer->base.set_quant_matrix = vl_mpeg12_buffer_set_quant_matrix;
    buffer->base.get_ycbcr_stream = vl_mpeg12_buffer_get_ycbcr_stream;
    buffer->base.get_ycbcr_buffer = vl_mpeg12_buffer_get_ycbcr_buffer;
    buffer->base.get_mv_stream_stride = vl_mpeg12_buffer_get_mv_stream_stride;
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 374fa511054..bea5067331e 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -179,6 +179,13 @@ struct pipe_video_decode_buffer
     */
    void (*begin_frame)(struct pipe_video_decode_buffer *decbuf);
 
+   /**
+    * set the quantification matrixes
+    */
+   void (*set_quant_matrix)(struct pipe_video_decode_buffer *decbuf,
+                            uint8_t intra_matrix[64],
+                            uint8_t non_intra_matrix[64]);
+
    /**
     * get the pointer where to put the ycbcr blocks of a component
     */
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 6f64c38c6bc..4d8a24116a0 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -113,13 +113,9 @@ struct pipe_mpeg12_picture_desc
    unsigned frame_pred_frame_dct;
    unsigned q_scale_type;
    unsigned alternate_scan;
-   unsigned intra_dc_precision;
    unsigned intra_vlc_format;
    unsigned concealment_motion_vectors;
    unsigned f_code[2][2];
-
-   uint8_t *intra_quantizer_matrix;
-   uint8_t *non_intra_quantizer_matrix;
 };
 
 #ifdef __cplusplus
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 5c6d0467924..8458864cfc1 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -160,6 +160,7 @@ vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder,
 {
    struct pipe_mpeg12_picture_desc picture;
    struct pipe_video_buffer *ref_frames[2];
+   uint8_t intra_quantizer_matrix[64];
    unsigned num_ycbcr_blocks[3] = { 0, 0, 0 };
    unsigned i;
 
@@ -189,7 +190,6 @@ vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder,
    picture.frame_pred_frame_dct = picture_info->frame_pred_frame_dct;
    picture.q_scale_type = picture_info->q_scale_type;
    picture.alternate_scan = picture_info->alternate_scan;
-   picture.intra_dc_precision = picture_info->intra_dc_precision;
    picture.intra_vlc_format = picture_info->intra_vlc_format;
    picture.concealment_motion_vectors = picture_info->concealment_motion_vectors;
    picture.f_code[0][0] = picture_info->f_code[0][0] - 1;
@@ -197,11 +197,12 @@ vlVdpDecoderRenderMpeg2(struct pipe_video_decoder *decoder,
    picture.f_code[1][0] = picture_info->f_code[1][0] - 1;
    picture.f_code[1][1] = picture_info->f_code[1][1] - 1;
 
-   picture.intra_quantizer_matrix = picture_info->intra_quantizer_matrix;
-   picture.non_intra_quantizer_matrix = picture_info->non_intra_quantizer_matrix;
-
    buffer->begin_frame(buffer);
 
+   memcpy(intra_quantizer_matrix, picture_info->intra_quantizer_matrix, sizeof(intra_quantizer_matrix));
+   intra_quantizer_matrix[0] = 1 << (7 - picture_info->intra_dc_precision);
+   buffer->set_quant_matrix(buffer, intra_quantizer_matrix, picture_info->non_intra_quantizer_matrix);
+
    for (i = 0; i < bitstream_buffer_count; ++i)
       buffer->decode_bitstream(buffer, bitstream_buffers[i].bitstream_bytes,
                                bitstream_buffers[i].bitstream, &picture, num_ycbcr_blocks);
-- 
cgit v1.2.3


From f1bf7d3dbfa8c74b9537e6132f17dd8250a3451b Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 7 Jun 2011 21:13:59 +0200
Subject: [g3dvl] move dummy quantification into xvmc state tracker

---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   | 18 +++---------------
 src/gallium/include/pipe/p_video_context.h     |  4 ++--
 src/gallium/state_trackers/xorg/xvmc/surface.c | 13 +++++++++++++
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 238785f400d..eacb49e83c0 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -312,28 +312,16 @@ vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer)
 
       vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream);
    } else {
-      static const uint8_t dummy_quant[64] = {
-         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
-         0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
-      };
 
-      for (i = 0; i < VL_MAX_PLANES; ++i) {
+      for (i = 0; i < VL_MAX_PLANES; ++i)
          vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear);
-         vl_zscan_upload_quant(&buf->zscan[i], dummy_quant, dummy_quant);
-      }
    }
 }
 
 static void
 vl_mpeg12_buffer_set_quant_matrix(struct pipe_video_decode_buffer *buffer,
-                                  uint8_t intra_matrix[64],
-                                  uint8_t non_intra_matrix[64])
+                                  const uint8_t intra_matrix[64],
+                                  const uint8_t non_intra_matrix[64])
 {
    struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer;
    unsigned i;
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index bea5067331e..2a30099c5b0 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -183,8 +183,8 @@ struct pipe_video_decode_buffer
     * set the quantification matrixes
     */
    void (*set_quant_matrix)(struct pipe_video_decode_buffer *decbuf,
-                            uint8_t intra_matrix[64],
-                            uint8_t non_intra_matrix[64]);
+                            const uint8_t intra_matrix[64],
+                            const uint8_t non_intra_matrix[64]);
 
    /**
     * get the pointer where to put the ycbcr blocks of a component
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 4d6c58ebc76..3db17d1ac51 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -291,6 +291,17 @@ unmap_and_flush_surface(XvMCSurfacePrivate *surface)
 PUBLIC
 Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surface)
 {
+   static const uint8_t dummy_quant[64] = {
+      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+      0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10
+   };
+
    XvMCContextPrivate *context_priv;
    struct pipe_video_context *vpipe;
    XvMCSurfacePrivate *surface_priv;
@@ -312,6 +323,8 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
       return BadAlloc;
 
    surface_priv->decode_buffer = context_priv->decoder->create_buffer(context_priv->decoder);
+   surface_priv->decode_buffer->set_quant_matrix(surface_priv->decode_buffer, dummy_quant, dummy_quant);
+
    surface_priv->mv_stride = surface_priv->decode_buffer->get_mv_stream_stride(surface_priv->decode_buffer);
    surface_priv->video_buffer = vpipe->create_buffer(vpipe, PIPE_FORMAT_NV12,
                                                      context_priv->decoder->chroma_format,
-- 
cgit v1.2.3


From 8b02f9e67b83e40019d6b07b9a035ba5d5042688 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 7 Jun 2011 21:15:58 +0200
Subject: xvmc: fix some warning about uninitialized vars

---
 src/gallium/state_trackers/xorg/xvmc/context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index a28c3f7a424..fbfa1afe44c 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -53,7 +53,7 @@ static Status Validate(Display *dpy, XvPortID port, int surface_type_id,
    XvAdaptorInfo *adaptor_info;
    unsigned int num_adaptors;
    int num_types;
-   unsigned int max_width, max_height;
+   unsigned int max_width = 0, max_height = 0;
    Status ret;
 
    assert(dpy);
-- 
cgit v1.2.3


From 00b4e48560f4d576b7b1924257322f5167e58c8d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 7 Jun 2011 22:01:30 +0200
Subject: [g3dvl] rework video buffer format handling

---
 src/gallium/auxiliary/vl/vl_context.c      | 50 ++++++++++++------------------
 src/gallium/auxiliary/vl/vl_video_buffer.c | 27 ++++++++++++++++
 src/gallium/auxiliary/vl/vl_video_buffer.h |  6 ++++
 src/gallium/include/pipe/p_video_context.h |  5 ++-
 4 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index e4805eae98c..e3aa49d2856 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -35,18 +35,6 @@
 #include "vl_compositor.h"
 #include "vl_mpeg12_decoder.h"
 
-const enum pipe_format const_resource_formats_YV12[3] = {
-   PIPE_FORMAT_R8_UNORM,
-   PIPE_FORMAT_R8_UNORM,
-   PIPE_FORMAT_R8_UNORM
-};
-
-const enum pipe_format const_resource_formats_NV12[3] = {
-   PIPE_FORMAT_R8_UNORM,
-   PIPE_FORMAT_R8G8_UNORM,
-   PIPE_FORMAT_NONE
-};
-
 static void
 vl_context_destroy(struct pipe_video_context *context)
 {
@@ -76,15 +64,28 @@ vl_context_get_param(struct pipe_video_context *context, int param)
 static boolean
 vl_context_is_format_supported(struct pipe_video_context *context,
                                enum pipe_format format,
-                               unsigned usage)
+                               enum pipe_video_profile profile)
 {
    struct vl_context *ctx = (struct vl_context*)context;
+   const enum pipe_format *resource_formats;
+   unsigned i;
 
    assert(context);
 
-   return ctx->pipe->screen->is_format_supported(ctx->pipe->screen, format,
-                                                 PIPE_TEXTURE_2D,
-                                                 0, usage);
+   resource_formats = vl_video_buffer_formats(ctx->pipe, format);
+   if (!resource_formats)
+      return false;
+
+   for(i = 0; i < VL_MAX_PLANES; ++i) {
+      if (!resource_formats[i])
+         continue;
+
+      if (!ctx->pipe->screen->is_format_supported(ctx->pipe->screen, resource_formats[i],
+                                                  PIPE_TEXTURE_2D, 0, PIPE_USAGE_STATIC))
+         return false;
+   }
+
+   return true;
 }
 
 static struct pipe_surface *
@@ -215,27 +216,16 @@ vl_context_create_buffer(struct pipe_video_context *context,
                          unsigned width, unsigned height)
 {
    struct vl_context *ctx = (struct vl_context*)context;
+   const enum pipe_format *resource_formats;
    struct pipe_video_buffer *result;
    unsigned buffer_width, buffer_height;
 
-   const enum pipe_format *resource_formats;
-
    assert(context);
    assert(width > 0 && height > 0);
 
-   switch(buffer_format) {
-   case PIPE_FORMAT_YV12:
-      resource_formats = const_resource_formats_YV12;
-      break;
-
-   case PIPE_FORMAT_NV12:
-      resource_formats = const_resource_formats_NV12;
-      break;
-
-   default:
-      assert(0);
+   resource_formats = vl_video_buffer_formats(ctx->pipe, buffer_format);
+   if (!resource_formats)
       return NULL;
-   }
 
    buffer_width = ctx->pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
    buffer_height = ctx->pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
index 976d22830de..93bc096b733 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -38,6 +38,33 @@
 
 #include "vl_video_buffer.h"
 
+const enum pipe_format const_resource_formats_YV12[3] = {
+   PIPE_FORMAT_R8_UNORM,
+   PIPE_FORMAT_R8_UNORM,
+   PIPE_FORMAT_R8_UNORM
+};
+
+const enum pipe_format const_resource_formats_NV12[3] = {
+   PIPE_FORMAT_R8_UNORM,
+   PIPE_FORMAT_R8G8_UNORM,
+   PIPE_FORMAT_NONE
+};
+
+const enum pipe_format *
+vl_video_buffer_formats(struct pipe_context *pipe, enum pipe_format format)
+{
+   switch(format) {
+   case PIPE_FORMAT_YV12:
+      return const_resource_formats_YV12;
+
+   case PIPE_FORMAT_NV12:
+      return const_resource_formats_NV12;
+
+   default:
+      return NULL;
+   }
+}
+
 static void
 vl_video_buffer_destroy(struct pipe_video_buffer *buffer)
 {
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h
index 2dca74f641e..728c6f5f091 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.h
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.h
@@ -49,6 +49,12 @@ struct vl_video_buffer
    struct pipe_surface      *surfaces[VL_MAX_PLANES];
 };
 
+/**
+ * get subformats for each plane
+ */
+const enum pipe_format *
+vl_video_buffer_formats(struct pipe_context *pipe, enum pipe_format format);
+
 /**
  * initialize a buffer, creating its resources
  */
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 2a30099c5b0..c9e618b6080 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -62,12 +62,11 @@ struct pipe_video_context
    int (*get_param)(struct pipe_video_context *context, int param);
 
    /**
-    * Check if the given pipe_format is supported as a texture or
-    * drawing surface.
+    * Check if the given pipe_format is supported as a video buffer
     */
    boolean (*is_format_supported)(struct pipe_video_context *context,
                                   enum pipe_format format,
-                                  unsigned usage);
+                                  enum pipe_video_profile profile);
 
    /**
     * create a surface of a texture
-- 
cgit v1.2.3


From f265a194263bb2a3fa204947a9c98f472835f121 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Thu, 7 Jul 2011 22:51:45 +0200
Subject: [g3dvl] rename get_param to get_video_param and move into screen
 object

---
 src/gallium/auxiliary/vl/vl_context.c             | 37 ++++++++++-------------
 src/gallium/auxiliary/vl/vl_context.h             |  3 +-
 src/gallium/drivers/r300/r300_screen.c            | 13 ++++++++
 src/gallium/drivers/r300/r300_video_context.c     |  2 +-
 src/gallium/drivers/r600/r600_pipe.c              | 13 ++++++++
 src/gallium/drivers/r600/r600_video_context.c     |  2 +-
 src/gallium/drivers/softpipe/sp_screen.c          | 15 ++++++++-
 src/gallium/include/pipe/p_defines.h              |  5 +++
 src/gallium/include/pipe/p_screen.h               |  6 ++++
 src/gallium/include/pipe/p_video_context.h        |  6 ----
 src/gallium/state_trackers/xorg/xvmc/subpicture.c |  4 ++-
 11 files changed, 73 insertions(+), 33 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index e3aa49d2856..3a90a247bd8 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -47,20 +47,6 @@ vl_context_destroy(struct pipe_video_context *context)
    FREE(ctx);
 }
 
-static int
-vl_context_get_param(struct pipe_video_context *context, int param)
-{
-   struct vl_context *ctx = (struct vl_context*)context;
-
-   assert(context);
-
-   if (param == PIPE_CAP_NPOT_TEXTURES)
-      return !ctx->pot_buffers;
-
-   debug_printf("vl_context: Unknown PIPE_CAP %d\n", param);
-   return 0;
-}
-
 static boolean
 vl_context_is_format_supported(struct pipe_video_context *context,
                                enum pipe_format format,
@@ -192,12 +178,15 @@ vl_context_create_decoder(struct pipe_video_context *context,
 {
    struct vl_context *ctx = (struct vl_context*)context;
    unsigned buffer_width, buffer_height;
+   bool pot_buffers;
 
    assert(context);
    assert(width > 0 && height > 0);
+   
+   pot_buffers = !ctx->base.screen->get_video_param(ctx->base.screen, profile, PIPE_VIDEO_CAP_NPOT_TEXTURES);
 
-   buffer_width = ctx->pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
-   buffer_height = ctx->pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
+   buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
+   buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
 
    switch (u_reduce_video_profile(profile)) {
       case PIPE_VIDEO_CODEC_MPEG12:
@@ -219,16 +208,24 @@ vl_context_create_buffer(struct pipe_video_context *context,
    const enum pipe_format *resource_formats;
    struct pipe_video_buffer *result;
    unsigned buffer_width, buffer_height;
+   bool pot_buffers;
 
    assert(context);
    assert(width > 0 && height > 0);
 
+   pot_buffers = !ctx->base.screen->get_video_param
+   (
+      ctx->base.screen,
+      PIPE_VIDEO_PROFILE_UNKNOWN,
+      PIPE_VIDEO_CAP_NPOT_TEXTURES
+   );
+
    resource_formats = vl_video_buffer_formats(ctx->pipe, buffer_format);
    if (!resource_formats)
       return NULL;
 
-   buffer_width = ctx->pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
-   buffer_height = ctx->pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
+   buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
+   buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
 
    result = vl_video_buffer_init(context, ctx->pipe,
                                  buffer_width, buffer_height, 1,
@@ -252,7 +249,7 @@ vl_context_create_compositor(struct pipe_video_context *context)
 }
 
 struct pipe_video_context *
-vl_create_context(struct pipe_context *pipe, bool pot_buffers)
+vl_create_context(struct pipe_context *pipe)
 {
    struct vl_context *ctx;
 
@@ -264,7 +261,6 @@ vl_create_context(struct pipe_context *pipe, bool pot_buffers)
    ctx->base.screen = pipe->screen;
 
    ctx->base.destroy = vl_context_destroy;
-   ctx->base.get_param = vl_context_get_param;
    ctx->base.is_format_supported = vl_context_is_format_supported;
    ctx->base.create_surface = vl_context_create_surface;
    ctx->base.create_sampler_view = vl_context_create_sampler_view;
@@ -275,7 +271,6 @@ vl_create_context(struct pipe_context *pipe, bool pot_buffers)
    ctx->base.create_compositor = vl_context_create_compositor;
 
    ctx->pipe = pipe;
-   ctx->pot_buffers = pot_buffers;
 
    return &ctx->base;
 }
diff --git a/src/gallium/auxiliary/vl/vl_context.h b/src/gallium/auxiliary/vl/vl_context.h
index 9f12a0df925..4fbe2651d89 100644
--- a/src/gallium/auxiliary/vl/vl_context.h
+++ b/src/gallium/auxiliary/vl/vl_context.h
@@ -38,12 +38,11 @@ struct vl_context
 {
    struct pipe_video_context base;
    struct pipe_context *pipe;
-   bool pot_buffers;
 };
 
 /* drivers can call this function in their pipe_video_context constructors and pass it
    an accelerated pipe_context along with suitable buffering modes, etc */
 struct pipe_video_context *
-vl_create_context(struct pipe_context *pipe, bool pot_buffers);
+vl_create_context(struct pipe_context *pipe);
 
 #endif /* vl_context_h */
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 05af2148b38..a440ecb8c39 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -303,6 +303,18 @@ static float r300_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param)
     }
 }
 
+static int r300_get_video_param(struct pipe_screen *screen,
+				enum pipe_video_profile profile,
+				enum pipe_video_cap param)
+{
+	switch (param) {
+	case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+		return 0;
+	default:
+		return 0;
+	}
+}
+
 static boolean r300_is_format_supported(struct pipe_screen* screen,
                                         enum pipe_format format,
                                         enum pipe_texture_target target,
@@ -508,6 +520,7 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws)
     r300screen->screen.get_param = r300_get_param;
     r300screen->screen.get_shader_param = r300_get_shader_param;
     r300screen->screen.get_paramf = r300_get_paramf;
+    r300screen->screen.get_video_param = r300_get_video_param;
     r300screen->screen.is_format_supported = r300_is_format_supported;
     r300screen->screen.context_create = r300_create_context;
     r300screen->screen.video_context_create = r300_video_create;
diff --git a/src/gallium/drivers/r300/r300_video_context.c b/src/gallium/drivers/r300/r300_video_context.c
index 1d5bfefb9dd..697e45a5745 100644
--- a/src/gallium/drivers/r300/r300_video_context.c
+++ b/src/gallium/drivers/r300/r300_video_context.c
@@ -34,5 +34,5 @@ r300_video_create(struct pipe_screen *screen, void *priv)
    if (!pipe)
       return NULL;
 
-   return vl_create_context(pipe, false);
+   return vl_create_context(pipe);
 }
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 049a4daae66..a25b6d0ff96 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -507,6 +507,18 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
 	}
 }
 
+static int r600_get_video_param(struct pipe_screen *screen,
+				enum pipe_video_profile profile,
+				enum pipe_video_cap param)
+{
+	switch (param) {
+	case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
 static boolean r600_is_format_supported(struct pipe_screen* screen,
 					enum pipe_format format,
 					enum pipe_texture_target target,
@@ -653,6 +665,7 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon)
 	rscreen->screen.get_param = r600_get_param;
 	rscreen->screen.get_shader_param = r600_get_shader_param;
 	rscreen->screen.get_paramf = r600_get_paramf;
+	rscreen->screen.get_video_param = r600_get_video_param;
 	rscreen->screen.is_format_supported = r600_is_format_supported;
 	rscreen->screen.context_create = r600_create_context;
 	rscreen->screen.video_context_create = r600_video_create;
diff --git a/src/gallium/drivers/r600/r600_video_context.c b/src/gallium/drivers/r600/r600_video_context.c
index a0ab3475fde..5f0d5f5baec 100644
--- a/src/gallium/drivers/r600/r600_video_context.c
+++ b/src/gallium/drivers/r600/r600_video_context.c
@@ -40,5 +40,5 @@ r600_video_create(struct pipe_screen *screen, void *priv)
    if (!pipe)
       return NULL;
 
-   return vl_create_context(pipe, false);
+   return vl_create_context(pipe);
 }
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index ac2e65b988e..f0467e9148d 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -171,6 +171,18 @@ softpipe_get_paramf(struct pipe_screen *screen, enum pipe_cap param)
    }
 }
 
+static int
+softpipe_get_video_param(struct pipe_screen *screen,
+                         enum pipe_video_profile profile,
+                         enum pipe_video_cap param)
+{
+   switch (param) {
+   case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+      return 0;
+   default:
+      return 0;
+   }
+}
 
 /**
  * Query format support for creating a texture, drawing surface, etc.
@@ -299,7 +311,7 @@ sp_video_create(struct pipe_screen *screen, void *priv)
       return NULL;
 
    /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture with softpipe */
-   return vl_create_context(pipe, true);
+   return vl_create_context(pipe);
 }
 
 /**
@@ -324,6 +336,7 @@ softpipe_create_screen(struct sw_winsys *winsys)
    screen->base.get_param = softpipe_get_param;
    screen->base.get_shader_param = softpipe_get_shader_param;
    screen->base.get_paramf = softpipe_get_paramf;
+   screen->base.get_video_param = softpipe_get_video_param;
    screen->base.is_format_supported = softpipe_is_format_supported;
    screen->base.context_create = softpipe_create_context;
    screen->base.flush_frontbuffer = softpipe_flush_frontbuffer;
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 6c6641588d3..d8b1a9e171f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -493,6 +493,11 @@ enum pipe_shader_cap
    PIPE_SHADER_CAP_SUBROUTINES = 16, /* BGNSUB, ENDSUB, CAL, RET */
 };
 
+/* Video caps, can be different for each codec/profile */
+enum pipe_video_cap
+{
+   PIPE_VIDEO_CAP_NPOT_TEXTURES = 0,
+};
 
 enum pipe_video_codec
 {
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index 4f95fa1b2e1..2fa469bbea7 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -92,6 +92,12 @@ struct pipe_screen {
     */
    int (*get_shader_param)( struct pipe_screen *, unsigned shader, enum pipe_shader_cap param );
 
+   /**
+    * Query an integer-valued capability/parameter/limit for a codec/profile
+    * \param param  one of PIPE_VIDEO_CAP_x
+    */
+   int (*get_video_param)( struct pipe_screen *, enum pipe_video_profile profile, enum pipe_video_cap param );
+
    struct pipe_context * (*context_create)( struct pipe_screen *, void *priv );
 
    struct pipe_video_context * (*video_context_create)( struct pipe_screen *screen, void *priv );
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index c9e618b6080..1fb635f1756 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -55,12 +55,6 @@ struct pipe_video_context
     */
    void (*destroy)(struct pipe_video_context *context);
 
-   /**
-    * Query an integer-valued capability/parameter/limit
-    * \param param  one of PIPE_CAP_x
-    */
-   int (*get_param)(struct pipe_video_context *context, int param);
-
    /**
     * Check if the given pipe_format is supported as a video buffer
     */
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 821c87e0dab..b4594ad5e08 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -232,7 +232,9 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    tex_templ.target = PIPE_TEXTURE_2D;
    tex_templ.format = XvIDToPipe(xvimage_id);
    tex_templ.last_level = 0;
-   if (vpipe->get_param(vpipe, PIPE_CAP_NPOT_TEXTURES)) {
+   if (vpipe->screen->get_video_param(vpipe->screen,
+                                      PIPE_VIDEO_PROFILE_UNKNOWN,
+                                      PIPE_VIDEO_CAP_NPOT_TEXTURES)) {
       tex_templ.width0 = width;
       tex_templ.height0 = height;
    }
-- 
cgit v1.2.3


From 7eca76952b6726be9459375dde7478a01789577e Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Jul 2011 11:20:39 +0200
Subject: [g3dvl] rename is_format_supported to is_video_format_supported and
 move it into screen object

---
 src/gallium/auxiliary/vl/vl_context.c      | 30 +-----------------------------
 src/gallium/auxiliary/vl/vl_video_buffer.c | 25 ++++++++++++++++++++++++-
 src/gallium/auxiliary/vl/vl_video_buffer.h | 11 ++++++++++-
 src/gallium/drivers/r300/r300_screen.c     |  2 ++
 src/gallium/drivers/r600/r600_pipe.c       |  2 ++
 src/gallium/drivers/softpipe/sp_screen.c   |  2 ++
 src/gallium/include/pipe/p_screen.h        |  8 ++++++++
 src/gallium/include/pipe/p_video_context.h |  7 -------
 8 files changed, 49 insertions(+), 38 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index 3a90a247bd8..46e1981cc9f 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -47,33 +47,6 @@ vl_context_destroy(struct pipe_video_context *context)
    FREE(ctx);
 }
 
-static boolean
-vl_context_is_format_supported(struct pipe_video_context *context,
-                               enum pipe_format format,
-                               enum pipe_video_profile profile)
-{
-   struct vl_context *ctx = (struct vl_context*)context;
-   const enum pipe_format *resource_formats;
-   unsigned i;
-
-   assert(context);
-
-   resource_formats = vl_video_buffer_formats(ctx->pipe, format);
-   if (!resource_formats)
-      return false;
-
-   for(i = 0; i < VL_MAX_PLANES; ++i) {
-      if (!resource_formats[i])
-         continue;
-
-      if (!ctx->pipe->screen->is_format_supported(ctx->pipe->screen, resource_formats[i],
-                                                  PIPE_TEXTURE_2D, 0, PIPE_USAGE_STATIC))
-         return false;
-   }
-
-   return true;
-}
-
 static struct pipe_surface *
 vl_context_create_surface(struct pipe_video_context *context,
                           struct pipe_resource *resource,
@@ -220,7 +193,7 @@ vl_context_create_buffer(struct pipe_video_context *context,
       PIPE_VIDEO_CAP_NPOT_TEXTURES
    );
 
-   resource_formats = vl_video_buffer_formats(ctx->pipe, buffer_format);
+   resource_formats = vl_video_buffer_formats(ctx->pipe->screen, buffer_format);
    if (!resource_formats)
       return NULL;
 
@@ -261,7 +234,6 @@ vl_create_context(struct pipe_context *pipe)
    ctx->base.screen = pipe->screen;
 
    ctx->base.destroy = vl_context_destroy;
-   ctx->base.is_format_supported = vl_context_is_format_supported;
    ctx->base.create_surface = vl_context_create_surface;
    ctx->base.create_sampler_view = vl_context_create_sampler_view;
    ctx->base.clear_sampler = vl_context_clear_sampler;
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
index 93bc096b733..9b7bab47484 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -51,7 +51,7 @@ const enum pipe_format const_resource_formats_NV12[3] = {
 };
 
 const enum pipe_format *
-vl_video_buffer_formats(struct pipe_context *pipe, enum pipe_format format)
+vl_video_buffer_formats(struct pipe_screen *screen, enum pipe_format format)
 {
    switch(format) {
    case PIPE_FORMAT_YV12:
@@ -65,6 +65,29 @@ vl_video_buffer_formats(struct pipe_context *pipe, enum pipe_format format)
    }
 }
 
+boolean
+vl_video_buffer_is_format_supported(struct pipe_screen *screen,
+                                    enum pipe_format format,
+                                    enum pipe_video_profile profile)
+{
+   const enum pipe_format *resource_formats;
+   unsigned i;
+
+   resource_formats = vl_video_buffer_formats(screen, format);
+   if (!resource_formats)
+      return false;
+
+   for(i = 0; i < VL_MAX_PLANES; ++i) {
+      if (!resource_formats[i])
+         continue;
+
+      if (!screen->is_format_supported(screen, resource_formats[i], PIPE_TEXTURE_2D, 0, PIPE_USAGE_STATIC))
+         return false;
+   }
+
+   return true;
+}
+
 static void
 vl_video_buffer_destroy(struct pipe_video_buffer *buffer)
 {
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h
index 728c6f5f091..8755c54dc73 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.h
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.h
@@ -53,7 +53,16 @@ struct vl_video_buffer
  * get subformats for each plane
  */
 const enum pipe_format *
-vl_video_buffer_formats(struct pipe_context *pipe, enum pipe_format format);
+vl_video_buffer_formats(struct pipe_screen *screen, enum pipe_format format);
+
+/**
+ * check if video buffer format is supported for a codec/profile
+ * can be used as default implementation of screen->is_video_format_supported
+ */
+boolean
+vl_video_buffer_is_format_supported(struct pipe_screen *screen,
+                                    enum pipe_format format,
+                                    enum pipe_video_profile profile);
 
 /**
  * initialize a buffer, creating its resources
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index a440ecb8c39..53437d3ad08 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -25,6 +25,7 @@
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
 #include "os/os_time.h"
+#include "vl/vl_video_buffer.h"
 
 #include "r300_context.h"
 #include "r300_texture.h"
@@ -522,6 +523,7 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws)
     r300screen->screen.get_paramf = r300_get_paramf;
     r300screen->screen.get_video_param = r300_get_video_param;
     r300screen->screen.is_format_supported = r300_is_format_supported;
+    r300screen->screen.is_video_format_supported = vl_video_buffer_is_format_supported;
     r300screen->screen.context_create = r300_create_context;
     r300screen->screen.video_context_create = r300_video_create;
     r300screen->screen.fence_reference = r300_fence_reference;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index a25b6d0ff96..4b923f86704 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -38,6 +38,7 @@
 #include <util/u_memory.h>
 #include <util/u_inlines.h>
 #include "util/u_upload_mgr.h"
+#include <vl/vl_video_buffer.h>
 #include "os/os_time.h"
 #include <pipebuffer/pb_buffer.h>
 #include "r600.h"
@@ -667,6 +668,7 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon)
 	rscreen->screen.get_paramf = r600_get_paramf;
 	rscreen->screen.get_video_param = r600_get_video_param;
 	rscreen->screen.is_format_supported = r600_is_format_supported;
+	rscreen->screen.is_video_format_supported = vl_video_buffer_is_format_supported;
 	rscreen->screen.context_create = r600_create_context;
 	rscreen->screen.video_context_create = r600_video_create;
 	rscreen->screen.fence_reference = r600_fence_reference;
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index f0467e9148d..b978fb4f61e 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -34,6 +34,7 @@
 #include "pipe/p_screen.h"
 #include "draw/draw_context.h"
 #include "vl/vl_context.h"
+#include "vl/vl_video_buffer.h"
 
 #include "state_tracker/sw_winsys.h"
 #include "tgsi/tgsi_exec.h"
@@ -338,6 +339,7 @@ softpipe_create_screen(struct sw_winsys *winsys)
    screen->base.get_paramf = softpipe_get_paramf;
    screen->base.get_video_param = softpipe_get_video_param;
    screen->base.is_format_supported = softpipe_is_format_supported;
+   screen->base.is_video_format_supported = vl_video_buffer_is_format_supported;
    screen->base.context_create = softpipe_create_context;
    screen->base.flush_frontbuffer = softpipe_flush_frontbuffer;
    screen->base.video_context_create = sp_video_create;
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index 2fa469bbea7..011724a79cc 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -112,6 +112,14 @@ struct pipe_screen {
                                    enum pipe_texture_target target,
                                    unsigned sample_count,
                                    unsigned bindings );
+                                   
+   /**
+    * Check if the given pipe_format is supported as output for this codec/profile.
+    * \param profile  profile to check, may also be PIPE_VIDEO_PROFILE_UNKNOWN
+    */
+   boolean (*is_video_format_supported)( struct pipe_screen *,
+                                         enum pipe_format format,
+                                         enum pipe_video_profile profile );
 
    /**
     * Create a new texture object, using the given template info.
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 1fb635f1756..78cf43e2edc 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -55,13 +55,6 @@ struct pipe_video_context
     */
    void (*destroy)(struct pipe_video_context *context);
 
-   /**
-    * Check if the given pipe_format is supported as a video buffer
-    */
-   boolean (*is_format_supported)(struct pipe_video_context *context,
-                                  enum pipe_format format,
-                                  enum pipe_video_profile profile);
-
    /**
     * create a surface of a texture
     */
-- 
cgit v1.2.3


From 2ec350ff1d9f13ec95d7b9d46f57ad9b9efcc8ea Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Jul 2011 12:03:13 +0200
Subject: [g3dvl] make pipe_context mandatory for creation pipe_video_context

---
 src/gallium/auxiliary/vl/vl_context.c         |  2 --
 src/gallium/drivers/r300/Makefile             |  1 -
 src/gallium/drivers/r300/r300_screen.c        | 10 +++++-
 src/gallium/drivers/r300/r300_video_context.c | 38 -----------------------
 src/gallium/drivers/r300/r300_video_context.h | 30 ------------------
 src/gallium/drivers/r600/Makefile             |  1 -
 src/gallium/drivers/r600/r600_pipe.c          | 10 +++++-
 src/gallium/drivers/r600/r600_video_context.c | 44 ---------------------------
 src/gallium/drivers/r600/r600_video_context.h |  9 ------
 src/gallium/drivers/softpipe/sp_screen.c      | 11 ++-----
 src/gallium/include/pipe/p_screen.h           |  4 ++-
 src/gallium/winsys/g3dvl/dri/dri_winsys.c     | 23 +++++++++++---
 src/gallium/winsys/g3dvl/vl_winsys.h          |  1 +
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c    | 13 ++++++--
 14 files changed, 53 insertions(+), 144 deletions(-)
 delete mode 100644 src/gallium/drivers/r300/r300_video_context.c
 delete mode 100644 src/gallium/drivers/r300/r300_video_context.h
 delete mode 100644 src/gallium/drivers/r600/r600_video_context.c
 delete mode 100644 src/gallium/drivers/r600/r600_video_context.h

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index 46e1981cc9f..87de24c5402 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -42,8 +42,6 @@ vl_context_destroy(struct pipe_video_context *context)
 
    assert(context);
 
-   ctx->pipe->destroy(ctx->pipe);
-
    FREE(ctx);
 }
 
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index dfedf353877..4088216adcb 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -26,7 +26,6 @@ C_SOURCES = \
 	r300_texture.c \
 	r300_texture_desc.c \
 	r300_tgsi_to_rc.c \
-	r300_video_context.c \
 	r300_transfer.c
 
 LIBRARY_INCLUDES = \
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 53437d3ad08..854fc39e014 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -25,6 +25,7 @@
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
 #include "os/os_time.h"
+#include "vl/vl_context.h"
 #include "vl/vl_video_buffer.h"
 
 #include "r300_context.h"
@@ -32,7 +33,6 @@
 #include "r300_screen_buffer.h"
 #include "r300_state_inlines.h"
 #include "r300_public.h"
-#include "r300_video_context.h"
 
 #include "draw/draw_context.h"
 
@@ -425,6 +425,14 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
     return retval == usage;
 }
 
+static struct pipe_video_context *
+r300_video_create(struct pipe_screen *screen, struct pipe_context *pipe, void *priv)
+{
+   assert(screen);
+
+   return vl_create_context(pipe);
+}
+
 static void r300_destroy_screen(struct pipe_screen* pscreen)
 {
     struct r300_screen* r300screen = r300_screen(pscreen);
diff --git a/src/gallium/drivers/r300/r300_video_context.c b/src/gallium/drivers/r300/r300_video_context.c
deleted file mode 100644
index 697e45a5745..00000000000
--- a/src/gallium/drivers/r300/r300_video_context.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2009-2010  Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <vl/vl_context.h>
-#include <util/u_video.h>
-#include "r300_video_context.h"
-
-struct pipe_video_context *
-r300_video_create(struct pipe_screen *screen, void *priv)
-{
-   struct pipe_context *pipe;
-
-   assert(screen);
-
-   pipe = screen->context_create(screen, priv);
-   if (!pipe)
-      return NULL;
-
-   return vl_create_context(pipe);
-}
diff --git a/src/gallium/drivers/r300/r300_video_context.h b/src/gallium/drivers/r300/r300_video_context.h
deleted file mode 100644
index aaae14cec47..00000000000
--- a/src/gallium/drivers/r300/r300_video_context.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2009-2010  Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef __R300_VIDEO_CONTEXT_H__
-#define __R300_VIDEO_CONTEXT_H__
-
-#include <pipe/p_video_context.h>
-
-struct pipe_video_context *
-r300_video_create(struct pipe_screen *screen, void *priv);
-
-#endif
diff --git a/src/gallium/drivers/r600/Makefile b/src/gallium/drivers/r600/Makefile
index 3dda3a6339f..7e21e3e32b1 100644
--- a/src/gallium/drivers/r600/Makefile
+++ b/src/gallium/drivers/r600/Makefile
@@ -16,7 +16,6 @@ C_SOURCES = \
 	r600_shader.c \
 	r600_state.c \
 	r600_texture.c \
-	r600_video_context.c \
 	r700_asm.c \
 	evergreen_state.c \
 	eg_asm.c \
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 4b923f86704..9e0b0ea3e49 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -38,6 +38,7 @@
 #include <util/u_memory.h>
 #include <util/u_inlines.h>
 #include "util/u_upload_mgr.h"
+#include <vl/vl_context.h>
 #include <vl/vl_video_buffer.h>
 #include "os/os_time.h"
 #include <pipebuffer/pb_buffer.h>
@@ -47,7 +48,6 @@
 #include "r600_shader.h"
 #include "r600_pipe.h"
 #include "r600_state_inlines.h"
-#include "r600_video_context.h"
 
 /*
  * pipe_context
@@ -301,6 +301,14 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 	return &rctx->context;
 }
 
+static struct pipe_video_context *
+r600_video_create(struct pipe_screen *screen, struct pipe_context *pipe, void *priv)
+{
+	assert(screen && pipe);
+
+	return vl_create_context(pipe);
+}
+
 /*
  * pipe_screen
  */
diff --git a/src/gallium/drivers/r600/r600_video_context.c b/src/gallium/drivers/r600/r600_video_context.c
deleted file mode 100644
index 5f0d5f5baec..00000000000
--- a/src/gallium/drivers/r600/r600_video_context.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2010 Christian König
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "r600_video_context.h"
-#include "util/u_video.h"
-#include <vl/vl_context.h>
-
-struct pipe_video_context *
-r600_video_create(struct pipe_screen *screen, void *priv)
-{
-   struct pipe_context *pipe;
-
-   assert(screen);
-
-   pipe = screen->context_create(screen, priv);
-   if (!pipe)
-      return NULL;
-
-   return vl_create_context(pipe);
-}
diff --git a/src/gallium/drivers/r600/r600_video_context.h b/src/gallium/drivers/r600/r600_video_context.h
deleted file mode 100644
index f579980bd36..00000000000
--- a/src/gallium/drivers/r600/r600_video_context.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __R600_VIDEO_CONTEXT_H__
-#define __R600_VIDEO_CONTEXT_H__
-
-#include <pipe/p_video_context.h>
-
-struct pipe_video_context *
-r600_video_create(struct pipe_screen *screen, void *priv);
-
-#endif
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index b978fb4f61e..2a5485209d1 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -301,18 +301,11 @@ softpipe_flush_frontbuffer(struct pipe_screen *_screen,
 }
 
 static struct pipe_video_context *
-sp_video_create(struct pipe_screen *screen, void *priv)
+sp_video_create(struct pipe_screen *screen, struct pipe_context *context, void *priv)
 {
-   struct pipe_context *pipe;
-
    assert(screen);
 
-   pipe = screen->context_create(screen, NULL);
-   if (!pipe)
-      return NULL;
-
-   /* TODO: Use slice buffering for softpipe when implemented, no advantage to buffering an entire picture with softpipe */
-   return vl_create_context(pipe);
+   return vl_create_context(context);
 }
 
 /**
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index 011724a79cc..32869bb71e8 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -100,7 +100,9 @@ struct pipe_screen {
 
    struct pipe_context * (*context_create)( struct pipe_screen *, void *priv );
 
-   struct pipe_video_context * (*video_context_create)( struct pipe_screen *screen, void *priv );
+   struct pipe_video_context * (*video_context_create)( struct pipe_screen *screen,
+                                                        struct pipe_context *context,
+                                                        void *priv );
 
    /**
     * Check if the given pipe_format is supported as a texture or
diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
index ffb94de4a7b..42b33d191d4 100644
--- a/src/gallium/winsys/g3dvl/dri/dri_winsys.c
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -237,21 +237,31 @@ vl_video_create(struct vl_screen *vscreen)
    struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)vscreen;
    struct vl_dri_context *vl_dri_ctx;
 
+   if (!vscreen->pscreen->video_context_create) {
+      debug_printf("[G3DVL] No video support found on %s/%s.\n",
+                   vscreen->pscreen->get_vendor(vscreen->pscreen),
+                   vscreen->pscreen->get_name(vscreen->pscreen));
+      goto no_vpipe;
+   }
+
    vl_dri_ctx = CALLOC_STRUCT(vl_dri_context);
    if (!vl_dri_ctx)
       goto no_struct;
 
-   if (!vscreen->pscreen->video_context_create) {
+   vl_dri_ctx->base.pipe = vscreen->pscreen->context_create(vscreen->pscreen, vl_dri_ctx);
+   if (!vl_dri_ctx->base.pipe) {
       debug_printf("[G3DVL] No video support found on %s/%s.\n",
                    vscreen->pscreen->get_vendor(vscreen->pscreen),
                    vscreen->pscreen->get_name(vscreen->pscreen));
-      goto no_vpipe;
+      goto no_pipe;
    }
 
-   vl_dri_ctx->base.vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen, vl_dri_ctx);
+   vl_dri_ctx->base.vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen,
+                                                                   vl_dri_ctx->base.pipe,
+                                                                   vl_dri_ctx);
 
    if (!vl_dri_ctx->base.vpipe)
-      goto no_vpipe;
+      goto no_pipe;
 
    vl_dri_ctx->base.vpipe->priv = vl_dri_ctx;
    vl_dri_ctx->base.vscreen = vscreen;
@@ -259,9 +269,11 @@ vl_video_create(struct vl_screen *vscreen)
 
    return &vl_dri_ctx->base;
 
-no_vpipe:
+no_pipe:
    FREE(vl_dri_ctx);
+
 no_struct:
+no_vpipe:
    return NULL;
 }
 
@@ -271,6 +283,7 @@ void vl_video_destroy(struct vl_context *vctx)
 
    assert(vctx);
 
+   vl_dri_ctx->base.pipe->destroy(vl_dri_ctx->base.pipe);
    vl_dri_ctx->base.vpipe->destroy(vl_dri_ctx->base.vpipe);
    FREE(vl_dri_ctx);
 }
diff --git a/src/gallium/winsys/g3dvl/vl_winsys.h b/src/gallium/winsys/g3dvl/vl_winsys.h
index 152a4a62292..2d80c1d9b32 100644
--- a/src/gallium/winsys/g3dvl/vl_winsys.h
+++ b/src/gallium/winsys/g3dvl/vl_winsys.h
@@ -44,6 +44,7 @@ struct vl_screen
 struct vl_context
 {
    struct vl_screen *vscreen;
+   struct pipe_context *pipe;
    struct pipe_video_context *vpipe;
 };
 
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 1a67e2436e4..3caf6603243 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -172,18 +172,26 @@ void vl_screen_destroy(struct vl_screen *vscreen)
 struct vl_context*
 vl_video_create(struct vl_screen *vscreen)
 {
+   struct pipe_video_context *pipe;
    struct pipe_video_context *vpipe;
    struct vl_context *vctx;
 
    assert(vscreen);
    assert(vscreen->pscreen->video_context_create);
 
-   vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen, NULL);
-   if (!vpipe)
+   pipe = vscreen->pscreen->context_create(vscreen->pscreen, NULL);
+   if (!pipe)
       return NULL;
 
+   vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen, pipe, NULL);
+   if (!vpipe) {
+      pipe->destroy(pipe);
+      return NULL;
+   }
+
    vctx = CALLOC_STRUCT(vl_context);
    if (!vctx) {
+      pipe->destroy(pipe);
       vpipe->destroy(vpipe);
       return NULL;
    }
@@ -199,6 +207,7 @@ void vl_video_destroy(struct vl_context *vctx)
 {
    assert(vctx);
 
+   vctx->pipe->destroy(vctx->pipe);
    vctx->vpipe->destroy(vctx->vpipe);
    FREE(vctx);
 }
-- 
cgit v1.2.3


From 06ddbc3b8e58a6cf22708263a8b7d16cf1db5dbc Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Jul 2011 12:15:48 +0200
Subject: [g3dvl] remove create_surface from video context

---
 src/gallium/auxiliary/vl/vl_context.c      | 13 -------------
 src/gallium/include/pipe/p_video_context.h |  7 -------
 src/gallium/state_trackers/vdpau/output.c  |  9 ++++++---
 src/gallium/winsys/g3dvl/dri/dri_winsys.c  |  2 +-
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c |  6 +++---
 5 files changed, 10 insertions(+), 27 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index 87de24c5402..b3340cbe256 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -45,18 +45,6 @@ vl_context_destroy(struct pipe_video_context *context)
    FREE(ctx);
 }
 
-static struct pipe_surface *
-vl_context_create_surface(struct pipe_video_context *context,
-                          struct pipe_resource *resource,
-                          const struct pipe_surface *templ)
-{
-   struct vl_context *ctx = (struct vl_context*)context;
-
-   assert(ctx);
-
-   return ctx->pipe->create_surface(ctx->pipe, resource, templ);
-}
-
 static struct pipe_sampler_view *
 vl_context_create_sampler_view(struct pipe_video_context *context,
                                struct pipe_resource *resource,
@@ -232,7 +220,6 @@ vl_create_context(struct pipe_context *pipe)
    ctx->base.screen = pipe->screen;
 
    ctx->base.destroy = vl_context_destroy;
-   ctx->base.create_surface = vl_context_create_surface;
    ctx->base.create_sampler_view = vl_context_create_sampler_view;
    ctx->base.clear_sampler = vl_context_clear_sampler;
    ctx->base.upload_sampler = vl_context_upload_sampler;
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 78cf43e2edc..567a892e830 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -55,13 +55,6 @@ struct pipe_video_context
     */
    void (*destroy)(struct pipe_video_context *context);
 
-   /**
-    * create a surface of a texture
-    */
-   struct pipe_surface *(*create_surface)(struct pipe_video_context *context,
-                                          struct pipe_resource *resource,
-                                          const struct pipe_surface *templ);
-
    /**
     * sampler view handling, used for subpictures for example
     */
diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
index 72e63fc2f32..b45f699b83f 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -30,6 +30,7 @@
 
 #include <util/u_debug.h>
 #include <util/u_memory.h>
+#include <util/u_sampler.h>
 
 #include "vdpau_private.h"
 
@@ -39,6 +40,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
                          uint32_t width, uint32_t height,
                          VdpOutputSurface  *surface)
 {
+   struct pipe_context *pipe;
    struct pipe_video_context *context;
    struct pipe_resource res_tmpl, *res;
    struct pipe_sampler_view sv_templ;
@@ -54,8 +56,9 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
 
+   pipe = dev->context->pipe;
    context = dev->context->vpipe;
-   if (!context)
+   if (!pipe || !context)
       return VDP_STATUS_INVALID_HANDLE;
 
    vlsurface = CALLOC(1, sizeof(vlVdpOutputSurface));
@@ -85,7 +88,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
    // as long as we don't have a background picture we don't want an alpha channel
    sv_templ.swizzle_a = PIPE_SWIZZLE_ONE;
 
-   vlsurface->sampler_view = context->create_sampler_view(context, res, &sv_templ);
+   vlsurface->sampler_view = pipe->create_sampler_view(pipe, res, &sv_templ);
    if (!vlsurface->sampler_view) {
       FREE(dev);
       return VDP_STATUS_ERROR;
@@ -94,7 +97,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
    memset(&surf_templ, 0, sizeof(surf_templ));
    surf_templ.format = res->format;
    surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
-   vlsurface->surface = context->create_surface(context, res, &surf_templ);
+   vlsurface->surface = pipe->create_surface(pipe, res, &surf_templ);
    if (!vlsurface->surface) {
       FREE(dev);
       return VDP_STATUS_ERROR;
diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
index 42b33d191d4..29b7d3ea468 100644
--- a/src/gallium/winsys/g3dvl/dri/dri_winsys.c
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -95,7 +95,7 @@ vl_dri2_get_front(struct vl_context *vctx, Drawable drawable)
          memset(&surf_template, 0, sizeof(surf_template));
          surf_template.format = front_tex->format;
          surf_template.usage = PIPE_BIND_RENDER_TARGET;
-         front_surf = vctx->vpipe->create_surface(vctx->vpipe, front_tex, &surf_template);
+         front_surf = vctx->pipe->create_surface(vctx->pipe, front_tex, &surf_template);
       }
       pipe_resource_reference(&front_tex, NULL);
       Xfree(dri2_front);
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 3caf6603243..5cea6196cf7 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -99,8 +99,8 @@ vl_drawable_surface_get(struct vl_context *vctx, Drawable drawable)
    memset(&surf_template, 0, sizeof(surf_template));
    surf_template.format = templat.format;
    surf_template.usage = PIPE_BIND_RENDER_TARGET;
-   xsp_screen->drawable_surface = vctx->vpipe->create_surface(vctx->vpipe, drawable_tex,
-                                                              &surf_template);
+   xsp_screen->drawable_surface = vctx->pipe->create_surface(vctx->pipe, drawable_tex,
+                                                             &surf_template);
    pipe_resource_reference(&drawable_tex, NULL);
 
    if (!xsp_screen->drawable_surface)
@@ -172,7 +172,7 @@ void vl_screen_destroy(struct vl_screen *vscreen)
 struct vl_context*
 vl_video_create(struct vl_screen *vscreen)
 {
-   struct pipe_video_context *pipe;
+   struct pipe_context *pipe;
    struct pipe_video_context *vpipe;
    struct vl_context *vctx;
 
-- 
cgit v1.2.3


From 10fd45114d4a7bbac4093755305ea5e4ba3ab6a5 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Jul 2011 12:47:52 +0200
Subject: [g3dvl] remove sampler view handling from video context

---
 src/gallium/auxiliary/vl/vl_context.c             | 86 -----------------------
 src/gallium/include/pipe/p_video_context.h        | 31 --------
 src/gallium/state_trackers/vdpau/surface.c        | 24 ++++++-
 src/gallium/state_trackers/xorg/xvmc/subpicture.c | 71 +++++++++++++++----
 4 files changed, 79 insertions(+), 133 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index b3340cbe256..1ac0d9c3050 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -45,89 +45,6 @@ vl_context_destroy(struct pipe_video_context *context)
    FREE(ctx);
 }
 
-static struct pipe_sampler_view *
-vl_context_create_sampler_view(struct pipe_video_context *context,
-                               struct pipe_resource *resource,
-                               const struct pipe_sampler_view *templ)
-{
-   struct vl_context *ctx = (struct vl_context*)context;
-
-   assert(ctx);
-
-   return ctx->pipe->create_sampler_view(ctx->pipe, resource, templ);
-}
-
-static void
-vl_context_upload_sampler(struct pipe_video_context *context,
-                          struct pipe_sampler_view *dst,
-                          const struct pipe_box *dst_box,
-                          const void *src, unsigned src_stride,
-                          unsigned src_x, unsigned src_y)
-{
-   struct vl_context *ctx = (struct vl_context*)context;
-   struct pipe_transfer *transfer;
-   void *map;
-
-   assert(context);
-   assert(dst);
-   assert(dst_box);
-   assert(src);
-
-   transfer = ctx->pipe->get_transfer(ctx->pipe, dst->texture, 0, PIPE_TRANSFER_WRITE, dst_box);
-   if (!transfer)
-      return;
-
-   map = ctx->pipe->transfer_map(ctx->pipe, transfer);
-   if (!transfer)
-      goto error_map;
-
-   util_copy_rect(map, dst->texture->format, transfer->stride, 0, 0,
-                  dst_box->width, dst_box->height,
-                  src, src_stride, src_x, src_y);
-
-   ctx->pipe->transfer_unmap(ctx->pipe, transfer);
-
-error_map:
-   ctx->pipe->transfer_destroy(ctx->pipe, transfer);
-}
-
-static void
-vl_context_clear_sampler(struct pipe_video_context *context,
-                         struct pipe_sampler_view *dst,
-                         const struct pipe_box *dst_box,
-                         const float *rgba)
-{
-   struct vl_context *ctx = (struct vl_context*)context;
-   struct pipe_transfer *transfer;
-   union util_color uc;
-   void *map;
-   unsigned i;
-
-   assert(context);
-   assert(dst);
-   assert(dst_box);
-   assert(rgba);
-
-   transfer = ctx->pipe->get_transfer(ctx->pipe, dst->texture, 0, PIPE_TRANSFER_WRITE, dst_box);
-   if (!transfer)
-      return;
-
-   map = ctx->pipe->transfer_map(ctx->pipe, transfer);
-   if (!transfer)
-      goto error_map;
-
-   for ( i = 0; i < 4; ++i)
-      uc.f[i] = rgba[i];
-
-   util_fill_rect(map, dst->texture->format, transfer->stride, 0, 0,
-                  dst_box->width, dst_box->height, &uc);
-
-   ctx->pipe->transfer_unmap(ctx->pipe, transfer);
-
-error_map:
-   ctx->pipe->transfer_destroy(ctx->pipe, transfer);
-}
-
 static struct pipe_video_decoder *
 vl_context_create_decoder(struct pipe_video_context *context,
                           enum pipe_video_profile profile,
@@ -220,9 +137,6 @@ vl_create_context(struct pipe_context *pipe)
    ctx->base.screen = pipe->screen;
 
    ctx->base.destroy = vl_context_destroy;
-   ctx->base.create_sampler_view = vl_context_create_sampler_view;
-   ctx->base.clear_sampler = vl_context_clear_sampler;
-   ctx->base.upload_sampler = vl_context_upload_sampler;
    ctx->base.create_decoder = vl_context_create_decoder;
    ctx->base.create_buffer = vl_context_create_buffer;
    ctx->base.create_compositor = vl_context_create_compositor;
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 567a892e830..dfc383055a7 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -55,37 +55,6 @@ struct pipe_video_context
     */
    void (*destroy)(struct pipe_video_context *context);
 
-   /**
-    * sampler view handling, used for subpictures for example
-    */
-   /*@{*/
-
-   /**
-    * create a sampler view of a texture, for subpictures for example
-    */
-   struct pipe_sampler_view *(*create_sampler_view)(struct pipe_video_context *context,
-                                                    struct pipe_resource *resource,
-                                                    const struct pipe_sampler_view *templ);
-
-   /**
-    * upload image data to a sampler
-    */
-   void (*upload_sampler)(struct pipe_video_context *context,
-                          struct pipe_sampler_view *dst,
-                          const struct pipe_box *dst_box,
-                          const void *src, unsigned src_stride,
-                          unsigned src_x, unsigned src_y);
-
-   /**
-    * clear a sampler with a specific rgba color
-    */
-   void (*clear_sampler)(struct pipe_video_context *context,
-                         struct pipe_sampler_view *dst,
-                         const struct pipe_box *dst_box,
-                         const float *rgba);
-
-   /*}@*/
-
    /**
     * create a decoder for a specific video profile
     */
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index c2945c787da..f20087f3fca 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -33,6 +33,7 @@
 
 #include <util/u_memory.h>
 #include <util/u_debug.h>
+#include <util/u_rect.h>
 
 #include "vdpau_private.h"
 
@@ -159,6 +160,7 @@ vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface,
                               uint32_t const *source_pitches)
 {
    enum pipe_format pformat = FormatToPipe(source_ycbcr_format);
+   struct pipe_context *pipe;
    struct pipe_video_context *context;
    struct pipe_sampler_view **sampler_views;
    unsigned i;
@@ -170,8 +172,9 @@ vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface,
    if (!p_surf)
       return VDP_STATUS_INVALID_HANDLE;
 
+   pipe = p_surf->device->context->pipe;
    context = p_surf->device->context->vpipe;
-   if (!context)
+   if (!pipe && !context)
       return VDP_STATUS_INVALID_HANDLE;
 
    if (p_surf->video_buffer == NULL || pformat != p_surf->video_buffer->buffer_format) {
@@ -186,7 +189,24 @@ vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface,
    for (i = 0; i < 3; ++i) { //TODO put nr of planes into util format
       struct pipe_sampler_view *sv = sampler_views[i ? i ^ 3 : 0];
       struct pipe_box dst_box = { 0, 0, 0, sv->texture->width0, sv->texture->height0, 1 };
-      context->upload_sampler(context, sv, &dst_box, source_data[i], source_pitches[i], 0, 0);
+
+      struct pipe_transfer *transfer;
+      void *map;
+
+      transfer = pipe->get_transfer(pipe, sv->texture, 0, PIPE_TRANSFER_WRITE, &dst_box);
+      if (!transfer)
+         return VDP_STATUS_RESOURCES;
+
+      map = pipe->transfer_map(pipe, transfer);
+      if (map) {
+         util_copy_rect(map, sv->texture->format, transfer->stride, 0, 0,
+                        dst_box.width, dst_box.height,
+                        source_data[i], source_pitches[i], 0, 0);
+
+         pipe->transfer_unmap(pipe, transfer);
+      }
+
+      pipe->transfer_destroy(pipe, transfer);
    }
 
    return VDP_STATUS_OK;
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index b4594ad5e08..4ecb0e1f887 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -39,6 +39,7 @@
 #include <util/u_math.h>
 #include <util/u_format.h>
 #include <util/u_sampler.h>
+#include <util/u_rect.h>
 
 #include <vl_winsys.h>
 
@@ -192,12 +193,37 @@ static Status Validate(Display *dpy, XvPortID port, int surface_type_id, int xvi
    return i < num_subpics ? Success : BadMatch;
 }
 
+static void
+upload_sampler(struct pipe_context *pipe, struct pipe_sampler_view *dst,
+               const struct pipe_box *dst_box, const void *src, unsigned src_stride,
+               unsigned src_x, unsigned src_y)
+{
+   struct pipe_transfer *transfer;
+   void *map;
+
+   transfer = pipe->get_transfer(pipe, dst->texture, 0, PIPE_TRANSFER_WRITE, dst_box);
+   if (!transfer)
+      return;
+
+   map = pipe->transfer_map(pipe, transfer);
+   if (map) {
+      util_copy_rect(map, dst->texture->format, transfer->stride, 0, 0,
+                     dst_box->width, dst_box->height,
+                     src, src_stride, src_x, src_y);
+
+      pipe->transfer_unmap(pipe, transfer);
+   }
+
+   pipe->transfer_destroy(pipe, transfer);
+}
+
 PUBLIC
 Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *subpicture,
                             unsigned short width, unsigned short height, int xvimage_id)
 {
    XvMCContextPrivate *context_priv;
    XvMCSubpicturePrivate *subpicture_priv;
+   struct pipe_context *pipe;
    struct pipe_video_context *vpipe;
    struct pipe_resource tex_templ, *tex;
    struct pipe_sampler_view sampler_templ;
@@ -211,6 +237,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
       return XvMCBadContext;
 
    context_priv = context->privData;
+   pipe = context_priv->vctx->pipe;
    vpipe = context_priv->vctx->vpipe;
 
    if (!subpicture)
@@ -254,7 +281,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    u_sampler_view_default_template(&sampler_templ, tex, tex->format);
    XvIDToSwizzle(xvimage_id, &sampler_templ);
 
-   subpicture_priv->sampler = vpipe->create_sampler_view(vpipe, tex, &sampler_templ);
+   subpicture_priv->sampler = pipe->create_sampler_view(pipe, tex, &sampler_templ);
    pipe_resource_reference(&tex, NULL);
    if (!subpicture_priv->sampler) {
       FREE(subpicture_priv);
@@ -283,7 +310,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
       memset(&sampler_templ, 0, sizeof(sampler_templ));
       u_sampler_view_default_template(&sampler_templ, tex, tex->format);
       sampler_templ.swizzle_a = PIPE_SWIZZLE_ONE;
-      subpicture_priv->palette = vpipe->create_sampler_view(vpipe, tex, &sampler_templ);
+      subpicture_priv->palette = pipe->create_sampler_view(pipe, tex, &sampler_templ);
       pipe_resource_reference(&tex, NULL);
       if (!subpicture_priv->sampler) {
          FREE(subpicture_priv);
@@ -304,8 +331,12 @@ Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, sh
 {
    XvMCSubpicturePrivate *subpicture_priv;
    XvMCContextPrivate *context_priv;
+   struct pipe_context *pipe;
+   struct pipe_sampler_view *dst;
    struct pipe_box dst_box = {x, y, 0, width, height, 1};
-   float color_f[4];
+   struct pipe_transfer *transfer;
+   union util_color uc;
+   void *map;
 
    assert(dpy);
 
@@ -314,15 +345,28 @@ Status XvMCClearSubpicture(Display *dpy, XvMCSubpicture *subpicture, short x, sh
 
    /* Convert color to float */
    util_format_read_4f(PIPE_FORMAT_B8G8R8A8_UNORM,
-                       color_f, 1, &color, 4,
+                       uc.f, 1, &color, 4,
                        0, 0, 1, 1);
 
    subpicture_priv = subpicture->privData;
    context_priv = subpicture_priv->context->privData;
+   pipe = context_priv->vctx->pipe;
+   dst = subpicture_priv->sampler;
+   
    /* TODO: Assert clear rect is within bounds? Or clip? */
-   context_priv->vctx->vpipe->clear_sampler(context_priv->vctx->vpipe,
-                                            subpicture_priv->sampler, &dst_box,
-                                            color_f);
+   transfer = pipe->get_transfer(pipe, dst->texture, 0, PIPE_TRANSFER_WRITE, &dst_box);
+   if (!transfer)
+      return XvMCBadSubpicture;
+
+   map = pipe->transfer_map(pipe, transfer);
+   if (map) {
+      util_fill_rect(map, dst->texture->format, transfer->stride, 0, 0,
+                     dst_box.width, dst_box.height, &uc);
+
+      pipe->transfer_unmap(pipe, transfer);
+   }
+
+   pipe->transfer_destroy(pipe, transfer);
 
    return Success;
 }
@@ -334,7 +378,7 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
 {
    XvMCSubpicturePrivate *subpicture_priv;
    XvMCContextPrivate *context_priv;
-   struct pipe_video_context *vpipe;
+   struct pipe_context *pipe;
    struct pipe_box dst_box = {dstx, dsty, 0, width, height, 1};
    unsigned src_stride;
 
@@ -356,13 +400,12 @@ Status XvMCCompositeSubpicture(Display *dpy, XvMCSubpicture *subpicture, XvImage
 
    subpicture_priv = subpicture->privData;
    context_priv = subpicture_priv->context->privData;
-   vpipe = context_priv->vctx->vpipe;
+   pipe = context_priv->vctx->pipe;
 
    /* clipping should be done by upload_sampler and regardles what the documentation
    says image->pitches[0] doesn't seems to be in bytes, so don't use it */
    src_stride = image->width * util_format_get_blocksize(subpicture_priv->sampler->texture->format);
-   vpipe->upload_sampler(vpipe, subpicture_priv->sampler, &dst_box,
-                         image->data, src_stride, srcx, srcy);
+   upload_sampler(pipe, subpicture_priv->sampler, &dst_box, image->data, src_stride, srcx, srcy);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Subpicture %p composited.\n", subpicture);
 
@@ -396,7 +439,7 @@ Status XvMCSetSubpicturePalette(Display *dpy, XvMCSubpicture *subpicture, unsign
 {
    XvMCSubpicturePrivate *subpicture_priv;
    XvMCContextPrivate *context_priv;
-   struct pipe_video_context *vpipe;
+   struct pipe_context *pipe;
    struct pipe_box dst_box = {0, 0, 0, 0, 1, 1};
 
    assert(dpy);
@@ -407,11 +450,11 @@ Status XvMCSetSubpicturePalette(Display *dpy, XvMCSubpicture *subpicture, unsign
 
    subpicture_priv = subpicture->privData;
    context_priv = subpicture_priv->context->privData;
-   vpipe = context_priv->vctx->vpipe;
+   pipe = context_priv->vctx->pipe;
 
    dst_box.width = subpicture->num_palette_entries;
 
-   vpipe->upload_sampler(vpipe, subpicture_priv->palette, &dst_box, palette, 0, 0, 0);
+   upload_sampler(pipe, subpicture_priv->palette, &dst_box, palette, 0, 0, 0);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Palette of Subpicture %p set.\n", subpicture);
 
-- 
cgit v1.2.3


From bd5fd67a3e3cda4b7676dd4745fc5d5524709210 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Jul 2011 14:44:19 +0200
Subject: [g3dvl] move compositor creation and handling directly into the state
 trackers

---
 src/gallium/auxiliary/vl/vl_compositor.c           | 112 ++++++++-------------
 src/gallium/auxiliary/vl/vl_compositor.h           |  90 ++++++++++++++++-
 src/gallium/auxiliary/vl/vl_context.c              |  11 --
 src/gallium/include/pipe/p_video_context.h         |  83 ---------------
 src/gallium/state_trackers/vdpau/mixer.c           |  14 +--
 src/gallium/state_trackers/vdpau/presentation.c    |  16 +--
 src/gallium/state_trackers/vdpau/vdpau_private.h   |   5 +-
 src/gallium/state_trackers/xorg/xvmc/attributes.c  |   2 +-
 src/gallium/state_trackers/xorg/xvmc/context.c     |   7 +-
 src/gallium/state_trackers/xorg/xvmc/surface.c     |  20 ++--
 .../state_trackers/xorg/xvmc/xvmc_private.h        |   4 +-
 11 files changed, 164 insertions(+), 200 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 83c93637219..78b8d0627ce 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -500,36 +500,32 @@ draw_layers(struct vl_compositor *c)
    }
 }
 
-static void
-vl_compositor_reset_dirty_area(struct pipe_video_compositor *compositor)
+void
+vl_compositor_reset_dirty_area(struct vl_compositor *c)
 {
-   struct vl_compositor *c = (struct vl_compositor *)compositor;
-
-   assert(compositor);
+   assert(c);
 
    c->dirty_tl.x = c->dirty_tl.y = 0.0f;
    c->dirty_br.x = c->dirty_br.y = 1.0f;
 }
 
-static void
-vl_compositor_set_clear_color(struct pipe_video_compositor *compositor, float color[4])
+void
+vl_compositor_set_clear_color(struct vl_compositor *c, float color[4])
 {
-   struct vl_compositor *c = (struct vl_compositor *)compositor;
    unsigned i;
 
-   assert(compositor);
+   assert(c);
 
    for (i = 0; i < 4; ++i)
       c->clear_color[i] = color[i];
 }
 
-static void
-vl_compositor_clear_layers(struct pipe_video_compositor *compositor)
+void
+vl_compositor_clear_layers(struct vl_compositor *c)
 {
-   struct vl_compositor *c = (struct vl_compositor *)compositor;
    unsigned i, j;
 
-   assert(compositor);
+   assert(c);
 
    c->used_layers = 0;
    for ( i = 0; i < VL_COMPOSITOR_MAX_LAYERS; ++i) {
@@ -539,28 +535,24 @@ vl_compositor_clear_layers(struct pipe_video_compositor *compositor)
    }
 }
 
-static void
-vl_compositor_destroy(struct pipe_video_compositor *compositor)
+void
+vl_compositor_cleanup(struct vl_compositor *c)
 {
-   struct vl_compositor *c = (struct vl_compositor *)compositor;
-   assert(compositor);
+   assert(c);
 
-   vl_compositor_clear_layers(compositor);
+   vl_compositor_clear_layers(c);
 
    cleanup_buffers(c);
    cleanup_shaders(c);
    cleanup_pipe_state(c);
-
-   FREE(compositor);
 }
 
-static void
-vl_compositor_set_csc_matrix(struct pipe_video_compositor *compositor, const float matrix[16])
+void
+vl_compositor_set_csc_matrix(struct vl_compositor *c, const float matrix[16])
 {
-   struct vl_compositor *c = (struct vl_compositor *)compositor;
    struct pipe_transfer *buf_transfer;
 
-   assert(compositor);
+   assert(c);
 
    memcpy
    (
@@ -574,18 +566,17 @@ vl_compositor_set_csc_matrix(struct pipe_video_compositor *compositor, const flo
    pipe_buffer_unmap(c->pipe, buf_transfer);
 }
 
-static void
-vl_compositor_set_buffer_layer(struct pipe_video_compositor *compositor,
+void
+vl_compositor_set_buffer_layer(struct vl_compositor *c,
                                unsigned layer,
                                struct pipe_video_buffer *buffer,
                                struct pipe_video_rect *src_rect,
                                struct pipe_video_rect *dst_rect)
 {
-   struct vl_compositor *c = (struct vl_compositor *)compositor;
    struct pipe_sampler_view **sampler_views;
    unsigned i;
 
-   assert(compositor && buffer);
+   assert(c && buffer);
 
    assert(layer < VL_COMPOSITOR_MAX_LAYERS);
 
@@ -604,16 +595,15 @@ vl_compositor_set_buffer_layer(struct pipe_video_compositor *compositor,
                     dst_rect ? *dst_rect : default_rect(&c->layers[layer]));
 }
 
-static void
-vl_compositor_set_palette_layer(struct pipe_video_compositor *compositor,
+void
+vl_compositor_set_palette_layer(struct vl_compositor *c,
                                 unsigned layer,
                                 struct pipe_sampler_view *indexes,
                                 struct pipe_sampler_view *palette,
                                 struct pipe_video_rect *src_rect,
                                 struct pipe_video_rect *dst_rect)
 {
-   struct vl_compositor *c = (struct vl_compositor *)compositor;
-   assert(compositor && indexes && palette);
+   assert(c && indexes && palette);
 
    assert(layer < VL_COMPOSITOR_MAX_LAYERS);
 
@@ -632,15 +622,14 @@ vl_compositor_set_palette_layer(struct pipe_video_compositor *compositor,
 
 }
 
-static void
-vl_compositor_set_rgba_layer(struct pipe_video_compositor *compositor,
+void
+vl_compositor_set_rgba_layer(struct vl_compositor *c,
                              unsigned layer,
                              struct pipe_sampler_view *rgba,
                              struct pipe_video_rect *src_rect,
                              struct pipe_video_rect *dst_rect)
 {
-   struct vl_compositor *c = (struct vl_compositor *)compositor;
-   assert(compositor && rgba);
+   assert(c && rgba);
 
    assert(layer < VL_COMPOSITOR_MAX_LAYERS);
 
@@ -658,17 +647,16 @@ vl_compositor_set_rgba_layer(struct pipe_video_compositor *compositor,
                     dst_rect ? *dst_rect : default_rect(&c->layers[layer]));
 }
 
-static void
-vl_compositor_render(struct pipe_video_compositor *compositor,
+void
+vl_compositor_render(struct vl_compositor *c,
                      enum pipe_mpeg12_picture_type picture_type,
                      struct pipe_surface           *dst_surface,
                      struct pipe_video_rect        *dst_area,
                      struct pipe_fence_handle      **fence)
 {
-   struct vl_compositor *c = (struct vl_compositor *)compositor;
    struct pipe_scissor_state scissor;
 
-   assert(compositor);
+   assert(c);
    assert(dst_surface);
 
    c->fb_state.width = dst_surface->width;
@@ -713,48 +701,34 @@ vl_compositor_render(struct pipe_video_compositor *compositor,
    c->pipe->flush(c->pipe, fence);
 }
 
-struct pipe_video_compositor *
-vl_compositor_init(struct pipe_video_context *vpipe, struct pipe_context *pipe)
+bool
+vl_compositor_init(struct vl_compositor *c, struct pipe_context *pipe)
 {
    csc_matrix csc_matrix;
-   struct vl_compositor *compositor;
-
-   compositor = CALLOC_STRUCT(vl_compositor);
 
-   compositor->base.context = vpipe;
-   compositor->base.destroy = vl_compositor_destroy;
-   compositor->base.set_csc_matrix = vl_compositor_set_csc_matrix;
-   compositor->base.reset_dirty_area = vl_compositor_reset_dirty_area;
-   compositor->base.set_clear_color = vl_compositor_set_clear_color;
-   compositor->base.clear_layers = vl_compositor_clear_layers;
-   compositor->base.set_buffer_layer = vl_compositor_set_buffer_layer;
-   compositor->base.set_palette_layer = vl_compositor_set_palette_layer;
-   compositor->base.set_rgba_layer = vl_compositor_set_rgba_layer;
-   compositor->base.render_picture = vl_compositor_render;
+   c->pipe = pipe;
 
-   compositor->pipe = pipe;
-
-   if (!init_pipe_state(compositor))
+   if (!init_pipe_state(c))
       return false;
 
-   if (!init_shaders(compositor)) {
-      cleanup_pipe_state(compositor);
+   if (!init_shaders(c)) {
+      cleanup_pipe_state(c);
       return false;
    }
-   if (!init_buffers(compositor)) {
-      cleanup_shaders(compositor);
-      cleanup_pipe_state(compositor);
+   if (!init_buffers(c)) {
+      cleanup_shaders(c);
+      cleanup_pipe_state(c);
       return false;
    }
 
-   vl_compositor_clear_layers(&compositor->base);
+   vl_compositor_clear_layers(c);
 
    vl_csc_get_matrix(VL_CSC_COLOR_STANDARD_IDENTITY, NULL, true, csc_matrix);
-   vl_compositor_set_csc_matrix(&compositor->base, csc_matrix);
+   vl_compositor_set_csc_matrix(c, csc_matrix);
 
-   compositor->clear_color[0] = compositor->clear_color[1] = 0.0f;
-   compositor->clear_color[2] = compositor->clear_color[3] = 0.0f;
-   vl_compositor_reset_dirty_area(&compositor->base);
+   c->clear_color[0] = c->clear_color[1] = 0.0f;
+   c->clear_color[2] = c->clear_color[3] = 0.0f;
+   vl_compositor_reset_dirty_area(c);
 
-   return &compositor->base;
+   return true;
 }
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 33d2a20733d..97601897b66 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -36,6 +36,10 @@
 
 struct pipe_context;
 
+/**
+ * composing and displaying of image data
+ */
+
 #define VL_COMPOSITOR_MAX_LAYERS 16
 
 struct vl_compositor_layer
@@ -53,7 +57,6 @@ struct vl_compositor_layer
 
 struct vl_compositor
 {
-   struct pipe_video_compositor base;
    struct pipe_context *pipe;
 
    struct pipe_framebuffer_state fb_state;
@@ -79,7 +82,88 @@ struct vl_compositor
    struct vl_compositor_layer layers[VL_COMPOSITOR_MAX_LAYERS];
 };
 
-struct pipe_video_compositor *vl_compositor_init(struct pipe_video_context *vpipe,
-                                                 struct pipe_context *pipe);
+/**
+ * initialize this compositor
+ */
+bool
+vl_compositor_init(struct vl_compositor *compositor, struct pipe_context *pipe);
+
+/**
+ * set yuv -> rgba conversion matrix
+ */
+void
+vl_compositor_set_csc_matrix(struct vl_compositor *compositor, const float mat[16]);
+
+/**
+ * reset dirty area, so it's cleared with the clear colour
+ */
+void
+vl_compositor_reset_dirty_area(struct vl_compositor *compositor);
+
+/**
+ * set the clear color
+ */
+void
+vl_compositor_set_clear_color(struct vl_compositor *compositor, float color[4]);
+
+/**
+ * set overlay samplers
+ */
+/*@{*/
+
+/**
+ * reset all currently set layers
+ */
+void
+vl_compositor_clear_layers(struct vl_compositor *compositor);
+
+/**
+ * set a video buffer as a layer to render
+ */
+void
+vl_compositor_set_buffer_layer(struct vl_compositor *compositor,
+                               unsigned layer,
+                               struct pipe_video_buffer *buffer,
+                               struct pipe_video_rect *src_rect,
+                               struct pipe_video_rect *dst_rect);
+
+/**
+ * set a paletted sampler as a layer to render
+ */
+void
+vl_compositor_set_palette_layer(struct vl_compositor *compositor,
+                                unsigned layer,
+                                struct pipe_sampler_view *indexes,
+                                struct pipe_sampler_view *palette,
+                                struct pipe_video_rect *src_rect,
+                                struct pipe_video_rect *dst_rect);
+
+/**
+ * set a rgba sampler as a layer to render
+ */
+void
+vl_compositor_set_rgba_layer(struct vl_compositor *compositor,
+                             unsigned layer,
+                             struct pipe_sampler_view *rgba,
+                             struct pipe_video_rect *src_rect,
+                             struct pipe_video_rect *dst_rect);
+
+/*@}*/
+
+/**
+ * render the layers to the frontbuffer
+ */
+void
+vl_compositor_render(struct vl_compositor          *compositor,
+                     enum pipe_mpeg12_picture_type picture_type,
+                     struct pipe_surface           *dst_surface,
+                     struct pipe_video_rect        *dst_area,
+                     struct pipe_fence_handle      **fence);
+
+/**
+* destroy this compositor
+*/
+void
+vl_compositor_cleanup(struct vl_compositor *compositor);
 
 #endif /* vl_compositor_h */
diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index 1ac0d9c3050..b685e91d23e 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -114,16 +114,6 @@ vl_context_create_buffer(struct pipe_video_context *context,
    return result;
 }
 
-static struct pipe_video_compositor *
-vl_context_create_compositor(struct pipe_video_context *context)
-{
-   struct vl_context *ctx = (struct vl_context*)context;
-
-   assert(context);
-
-   return vl_compositor_init(context, ctx->pipe);
-}
-
 struct pipe_video_context *
 vl_create_context(struct pipe_context *pipe)
 {
@@ -139,7 +129,6 @@ vl_create_context(struct pipe_context *pipe)
    ctx->base.destroy = vl_context_destroy;
    ctx->base.create_decoder = vl_context_create_decoder;
    ctx->base.create_buffer = vl_context_create_buffer;
-   ctx->base.create_compositor = vl_context_create_compositor;
 
    ctx->pipe = pipe;
 
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index dfc383055a7..7ea67d3d198 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -71,11 +71,6 @@ struct pipe_video_context
                                               enum pipe_format buffer_format,
                                               enum pipe_video_chroma_format chroma_format,
                                               unsigned width, unsigned height);
-
-   /**
-    * Creates a video compositor
-    */
-   struct pipe_video_compositor *(*create_compositor)(struct pipe_video_context *context);
 };
 
 /**
@@ -201,84 +196,6 @@ struct pipe_video_buffer
    struct pipe_surface **(*get_surfaces)(struct pipe_video_buffer *buffer);
 };
 
-/**
- * composing and displaying of image data
- */
-struct pipe_video_compositor
-{
-   struct pipe_video_context *context;
-
-   /**
-    * destroy this compositor
-    */
-   void (*destroy)(struct pipe_video_compositor *compositor);
-
-   /**
-    * set yuv -> rgba conversion matrix
-    */
-   void (*set_csc_matrix)(struct pipe_video_compositor *compositor, const float mat[16]);
-
-   /**
-    * reset dirty area, so it's cleared with the clear colour
-    */
-   void (*reset_dirty_area)(struct pipe_video_compositor *compositor);
-
-   /**
-    * set the clear color
-    */
-   void (*set_clear_color)(struct pipe_video_compositor *compositor, float color[4]);
-
-   /**
-    * set overlay samplers
-    */
-   /*@{*/
-
-   /**
-    * reset all currently set layers
-    */
-   void (*clear_layers)(struct pipe_video_compositor *compositor);
-
-   /**
-    * set a video buffer as a layer to render
-    */
-   void (*set_buffer_layer)(struct pipe_video_compositor *compositor,
-                            unsigned layer,
-                            struct pipe_video_buffer *buffer,
-                            struct pipe_video_rect *src_rect,
-                            struct pipe_video_rect *dst_rect);
-
-   /**
-    * set a paletted sampler as a layer to render
-    */
-   void (*set_palette_layer)(struct pipe_video_compositor *compositor,
-                             unsigned layer,
-                             struct pipe_sampler_view *indexes,
-                             struct pipe_sampler_view *palette,
-                             struct pipe_video_rect *src_rect,
-                             struct pipe_video_rect *dst_rect);
-
-   /**
-    * set a rgba sampler as a layer to render
-    */
-   void (*set_rgba_layer)(struct pipe_video_compositor *compositor,
-                          unsigned layer,
-                          struct pipe_sampler_view *rgba,
-                          struct pipe_video_rect *src_rect,
-                          struct pipe_video_rect *dst_rect);
-
-   /*@}*/
-
-   /**
-    * render the layers to the frontbuffer
-    */
-   void (*render_picture)(struct pipe_video_compositor  *compositor,
-                          enum pipe_mpeg12_picture_type picture_type,
-                          struct pipe_surface           *dst_surface,
-                          struct pipe_video_rect        *dst_area,
-                          struct pipe_fence_handle      **fence);
-
-};
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index 85f4e1541ab..ea6d50d7457 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -61,7 +61,7 @@ vlVdpVideoMixerCreate(VdpDevice device,
       return VDP_STATUS_RESOURCES;
 
    vmixer->device = dev;
-   vmixer->compositor = context->create_compositor(context);
+   vl_compositor_init(&vmixer->compositor, dev->context->pipe);
 
    vl_csc_get_matrix
    (
@@ -69,7 +69,7 @@ vlVdpVideoMixerCreate(VdpDevice device,
       VL_CSC_COLOR_STANDARD_IDENTITY : VL_CSC_COLOR_STANDARD_BT_601,
       NULL, true, csc
    );
-   vmixer->compositor->set_csc_matrix(vmixer->compositor, csc);
+   vl_compositor_set_csc_matrix(&vmixer->compositor, csc);
 
    /*
     * TODO: Handle features and parameters
@@ -97,7 +97,7 @@ vlVdpVideoMixerDestroy(VdpVideoMixer mixer)
    if (!vmixer)
       return VDP_STATUS_INVALID_HANDLE;
 
-   vmixer->compositor->destroy(vmixer->compositor);
+   vl_compositor_cleanup(&vmixer->compositor);
 
    FREE(vmixer);
 
@@ -158,10 +158,10 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
    if (!dst)
       return VDP_STATUS_INVALID_HANDLE;
 
-   vmixer->compositor->clear_layers(vmixer->compositor);
-   vmixer->compositor->set_buffer_layer(vmixer->compositor, 0, surf->video_buffer, NULL, NULL);
-   vmixer->compositor->render_picture(vmixer->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME,
-                                      dst->surface, NULL, NULL);
+   vl_compositor_clear_layers(&vmixer->compositor);
+   vl_compositor_set_buffer_layer(&vmixer->compositor, 0, surf->video_buffer, NULL, NULL);
+   vl_compositor_render(&vmixer->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME,
+                        dst->surface, NULL, NULL);
 
    return VDP_STATUS_OK;
 }
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index 02fcfbd0746..0f87ca78972 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -67,8 +67,8 @@ vlVdpPresentationQueueCreate(VdpDevice device,
 
    pq->device = dev;
    pq->drawable = pqt->drawable;
-   pq->compositor = context->create_compositor(context);
-   if (!pq->compositor) {
+   
+   if (!vl_compositor_init(&pq->compositor, dev->context->pipe)) {
       ret = VDP_STATUS_ERROR;
       goto no_compositor;
    }
@@ -97,7 +97,7 @@ vlVdpPresentationQueueDestroy(VdpPresentationQueue presentation_queue)
    if (!pq)
       return VDP_STATUS_INVALID_HANDLE;
 
-   pq->compositor->destroy(pq->compositor);
+   vl_compositor_cleanup(&pq->compositor);
 
    vlRemoveDataHTAB(presentation_queue);
    FREE(pq);
@@ -120,7 +120,7 @@ vlVdpPresentationQueueSetBackgroundColor(VdpPresentationQueue presentation_queue
    if (!pq)
       return VDP_STATUS_INVALID_HANDLE;
 
-   pq->compositor->set_clear_color(pq->compositor, (float*)background_color);
+   vl_compositor_set_clear_color(&pq->compositor, (float*)background_color);
 
    return VDP_STATUS_OK;
 }
@@ -170,10 +170,10 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
    if (!surf)
       return VDP_STATUS_INVALID_HANDLE;
 
-   pq->compositor->clear_layers(pq->compositor);
-   pq->compositor->set_rgba_layer(pq->compositor, 0, surf->sampler_view, NULL, NULL);
-   pq->compositor->render_picture(pq->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME,
-                                  drawable_surface, NULL, NULL);
+   vl_compositor_clear_layers(&pq->compositor);
+   vl_compositor_set_rgba_layer(&pq->compositor, 0, surf->sampler_view, NULL, NULL);
+   vl_compositor_render(&pq->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME,
+                        drawable_surface, NULL, NULL);
 
    pq->device->context->vpipe->screen->flush_frontbuffer
    (
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 1d6ca39fe39..ada17dfadc9 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -37,6 +37,7 @@
 #include <pipe/p_video_context.h>
 
 #include <util/u_debug.h>
+#include <vl/vl_compositor.h>
 
 #include <vl_winsys.h>
 
@@ -188,13 +189,13 @@ typedef struct
 {
    vlVdpDevice *device;
    Drawable drawable;
-   struct pipe_video_compositor *compositor;
+   struct vl_compositor compositor;
 } vlVdpPresentationQueue;
 
 typedef struct
 {
    vlVdpDevice *device;
-   struct pipe_video_compositor *compositor;
+   struct vl_compositor compositor;
 } vlVdpVideoMixer;
 
 typedef struct
diff --git a/src/gallium/state_trackers/xorg/xvmc/attributes.c b/src/gallium/state_trackers/xorg/xvmc/attributes.c
index 06d5dc919b4..817af531a32 100644
--- a/src/gallium/state_trackers/xorg/xvmc/attributes.c
+++ b/src/gallium/state_trackers/xorg/xvmc/attributes.c
@@ -113,7 +113,7 @@ Status XvMCSetAttribute(Display *dpy, XvMCContext *context, Atom attribute, int
       context_priv->color_standard,
       &context_priv->procamp, true, csc
    );
-   context_priv->compositor->set_csc_matrix(context_priv->compositor, csc);
+   vl_compositor_set_csc_matrix(&context_priv->compositor, csc);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Set attribute %s to value %d.\n", attr, value);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index fbfa1afe44c..7b74825b37e 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -260,8 +260,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       return BadAlloc;
    }
 
-   context_priv->compositor = vctx->vpipe->create_compositor(vctx->vpipe);
-   if (!context_priv->compositor) {
+   if (!vl_compositor_init(&context_priv->compositor, vctx->pipe)) {
       XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL compositor.\n");
       context_priv->decoder->destroy(context_priv->decoder);
       vl_video_destroy(vctx);
@@ -280,7 +279,7 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       context_priv->color_standard,
       &context_priv->procamp, true, csc
    );
-   context_priv->compositor->set_csc_matrix(context_priv->compositor, csc);
+   vl_compositor_set_csc_matrix(&context_priv->compositor, csc);
 
    context_priv->vctx = vctx;
    context_priv->subpicture_max_width = subpic_max_w;
@@ -320,7 +319,7 @@ Status XvMCDestroyContext(Display *dpy, XvMCContext *context)
    vscreen = vctx->vscreen;
    pipe_surface_reference(&context_priv->drawable_surface, NULL);
    context_priv->decoder->destroy(context_priv->decoder);
-   context_priv->compositor->destroy(context_priv->compositor);
+   vl_compositor_cleanup(&context_priv->compositor);
    vl_video_destroy(vctx);
    vl_screen_destroy(vscreen);
    FREE(context_priv);
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 3db17d1ac51..292610be631 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -493,7 +493,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    static int dump_window = -1;
 
    struct pipe_video_context *vpipe;
-   struct pipe_video_compositor *compositor;
+   struct vl_compositor *compositor;
 
    XvMCSurfacePrivate *surface_priv;
    XvMCContextPrivate *context_priv;
@@ -519,7 +519,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
 
    subpicture_priv = surface_priv->subpicture ? surface_priv->subpicture->privData : NULL;
    vpipe = context_priv->vctx->vpipe;
-   compositor = context_priv->compositor;
+   compositor = &context_priv->compositor;
 
    if (!context_priv->drawable_surface ||
        context_priv->dst_rect.x != dst_rect.x || context_priv->dst_rect.y != dst_rect.y ||
@@ -527,7 +527,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
 
       context_priv->drawable_surface = vl_drawable_surface_get(context_priv->vctx, drawable);
       context_priv->dst_rect = dst_rect;
-      compositor->reset_dirty_area(compositor);
+      vl_compositor_reset_dirty_area(compositor);
    }
 
    if (!context_priv->drawable_surface)
@@ -547,8 +547,8 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
 
    unmap_and_flush_surface(surface_priv);
 
-   compositor->clear_layers(compositor);
-   compositor->set_buffer_layer(compositor, 0, surface_priv->video_buffer, &src_rect, NULL);
+   vl_compositor_clear_layers(compositor);
+   vl_compositor_set_buffer_layer(compositor, 0, surface_priv->video_buffer, &src_rect, NULL);
 
    if (subpicture_priv) {
       XVMC_MSG(XVMC_TRACE, "[XvMC] Surface %p has subpicture %p.\n", surface, surface_priv->subpicture);
@@ -556,11 +556,11 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
       assert(subpicture_priv->surface == surface);
 
       if (subpicture_priv->palette)
-         compositor->set_palette_layer(compositor, 1, subpicture_priv->sampler, subpicture_priv->palette,
-                                       &subpicture_priv->src_rect, &subpicture_priv->dst_rect);
+         vl_compositor_set_palette_layer(compositor, 1, subpicture_priv->sampler, subpicture_priv->palette,
+                                         &subpicture_priv->src_rect, &subpicture_priv->dst_rect);
       else
-         compositor->set_rgba_layer(compositor, 1, subpicture_priv->sampler,
-                                    &subpicture_priv->src_rect, &subpicture_priv->dst_rect);
+         vl_compositor_set_rgba_layer(compositor, 1, subpicture_priv->sampler,
+                                      &subpicture_priv->src_rect, &subpicture_priv->dst_rect);
 
       surface_priv->subpicture = NULL;
       subpicture_priv->surface = NULL;
@@ -569,7 +569,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    // Workaround for r600g, there seems to be a bug in the fence refcounting code
    vpipe->screen->fence_reference(vpipe->screen, &surface_priv->fence, NULL);
 
-   compositor->render_picture(compositor, PictureToPipe(flags), context_priv->drawable_surface, &dst_rect, &surface_priv->fence);
+   vl_compositor_render(compositor, PictureToPipe(flags), context_priv->drawable_surface, &dst_rect, &surface_priv->fence);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);
 
diff --git a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
index 8d26b196fdc..5f8d9d13cb3 100644
--- a/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
+++ b/src/gallium/state_trackers/xorg/xvmc/xvmc_private.h
@@ -37,6 +37,7 @@
 #include <util/u_math.h>
 
 #include <vl/vl_csc.h>
+#include <vl/vl_compositor.h>
 
 #define BLOCK_SIZE_SAMPLES 64
 #define BLOCK_SIZE_BYTES (BLOCK_SIZE_SAMPLES * 2)
@@ -44,7 +45,6 @@
 struct vl_context;
 
 struct pipe_video_decoder;
-struct pipe_video_compositor;
 struct pipe_video_decode_buffer;
 struct pipe_video_buffer;
 
@@ -55,10 +55,10 @@ typedef struct
 {
    struct vl_context *vctx;
    struct pipe_video_decoder *decoder;
-   struct pipe_video_compositor *compositor;
 
    enum VL_CSC_COLOR_STANDARD color_standard;
    struct vl_procamp procamp;
+   struct vl_compositor compositor;
 
    unsigned short subpicture_max_width;
    unsigned short subpicture_max_height;
-- 
cgit v1.2.3


From 3bb33c911b895819fde5e179b2466c08f88164cf Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Jul 2011 15:07:39 +0200
Subject: [g3dvl] remove the unused priv member from pipe_video_context

---
 src/gallium/drivers/r300/r300_screen.c     | 2 +-
 src/gallium/drivers/r600/r600_pipe.c       | 2 +-
 src/gallium/drivers/softpipe/sp_screen.c   | 2 +-
 src/gallium/include/pipe/p_screen.h        | 3 +--
 src/gallium/include/pipe/p_video_context.h | 2 --
 src/gallium/winsys/g3dvl/dri/dri_winsys.c  | 5 +----
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c | 3 +--
 7 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 854fc39e014..141df11ef91 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -426,7 +426,7 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
 }
 
 static struct pipe_video_context *
-r300_video_create(struct pipe_screen *screen, struct pipe_context *pipe, void *priv)
+r300_video_create(struct pipe_screen *screen, struct pipe_context *pipe)
 {
    assert(screen);
 
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 9e0b0ea3e49..b3264c5a670 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -302,7 +302,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 }
 
 static struct pipe_video_context *
-r600_video_create(struct pipe_screen *screen, struct pipe_context *pipe, void *priv)
+r600_video_create(struct pipe_screen *screen, struct pipe_context *pipe)
 {
 	assert(screen && pipe);
 
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 2a5485209d1..98147cfd3c8 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -301,7 +301,7 @@ softpipe_flush_frontbuffer(struct pipe_screen *_screen,
 }
 
 static struct pipe_video_context *
-sp_video_create(struct pipe_screen *screen, struct pipe_context *context, void *priv)
+sp_video_create(struct pipe_screen *screen, struct pipe_context *context)
 {
    assert(screen);
 
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index 32869bb71e8..28209346c78 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -101,8 +101,7 @@ struct pipe_screen {
    struct pipe_context * (*context_create)( struct pipe_screen *, void *priv );
 
    struct pipe_video_context * (*video_context_create)( struct pipe_screen *screen,
-                                                        struct pipe_context *context,
-                                                        void *priv );
+                                                        struct pipe_context *context );
 
    /**
     * Check if the given pipe_format is supported as a texture or
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index 7ea67d3d198..aa42e8e0884 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -47,8 +47,6 @@ struct pipe_video_context
 {
    struct pipe_screen *screen;
 
-   void *priv; /**< context private data (for DRI for example) */
-
    /**
     * destroy context, all objects created from this context
     * (buffers, decoders, compositors etc...) must be freed before calling this
diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
index 29b7d3ea468..1d066f826db 100644
--- a/src/gallium/winsys/g3dvl/dri/dri_winsys.c
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -256,14 +256,11 @@ vl_video_create(struct vl_screen *vscreen)
       goto no_pipe;
    }
 
-   vl_dri_ctx->base.vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen,
-                                                                   vl_dri_ctx->base.pipe,
-                                                                   vl_dri_ctx);
+   vl_dri_ctx->base.vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen, vl_dri_ctx->base.pipe);
 
    if (!vl_dri_ctx->base.vpipe)
       goto no_pipe;
 
-   vl_dri_ctx->base.vpipe->priv = vl_dri_ctx;
    vl_dri_ctx->base.vscreen = vscreen;
    vl_dri_ctx->fd = vl_dri_scrn->dri_screen->fd;
 
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 5cea6196cf7..0487bd9c560 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -183,7 +183,7 @@ vl_video_create(struct vl_screen *vscreen)
    if (!pipe)
       return NULL;
 
-   vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen, pipe, NULL);
+   vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen, pipe);
    if (!vpipe) {
       pipe->destroy(pipe);
       return NULL;
@@ -196,7 +196,6 @@ vl_video_create(struct vl_screen *vscreen)
       return NULL;
    }
 
-   vpipe->priv = vctx;
    vctx->vpipe = vpipe;
    vctx->vscreen = vscreen;
 
-- 
cgit v1.2.3


From 4e837f557bf5f5afb286e1f2244ed69c0092c2d6 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Jul 2011 16:56:11 +0200
Subject: [g3dvl] move video buffer creation out of video context

---
 src/gallium/auxiliary/vl/vl_context.c          | 41 ------------------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c   | 45 +++++++++++--------
 src/gallium/auxiliary/vl/vl_video_buffer.c     | 60 +++++++++++++++++++++-----
 src/gallium/auxiliary/vl/vl_video_buffer.h     | 24 +++++++----
 src/gallium/drivers/r300/r300_context.c        |  3 ++
 src/gallium/drivers/r600/r600_pipe.c           |  1 +
 src/gallium/drivers/softpipe/sp_context.c      |  3 ++
 src/gallium/include/pipe/p_context.h           | 11 +++++
 src/gallium/include/pipe/p_video_context.h     |  9 +---
 src/gallium/state_trackers/vdpau/surface.c     |  4 +-
 src/gallium/state_trackers/xorg/xvmc/surface.c | 12 ++++--
 11 files changed, 120 insertions(+), 93 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
index b685e91d23e..fec227dc01f 100644
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ b/src/gallium/auxiliary/vl/vl_context.c
@@ -74,46 +74,6 @@ vl_context_create_decoder(struct pipe_video_context *context,
    return NULL;
 }
 
-static struct pipe_video_buffer *
-vl_context_create_buffer(struct pipe_video_context *context,
-                         enum pipe_format buffer_format,
-                         enum pipe_video_chroma_format chroma_format,
-                         unsigned width, unsigned height)
-{
-   struct vl_context *ctx = (struct vl_context*)context;
-   const enum pipe_format *resource_formats;
-   struct pipe_video_buffer *result;
-   unsigned buffer_width, buffer_height;
-   bool pot_buffers;
-
-   assert(context);
-   assert(width > 0 && height > 0);
-
-   pot_buffers = !ctx->base.screen->get_video_param
-   (
-      ctx->base.screen,
-      PIPE_VIDEO_PROFILE_UNKNOWN,
-      PIPE_VIDEO_CAP_NPOT_TEXTURES
-   );
-
-   resource_formats = vl_video_buffer_formats(ctx->pipe->screen, buffer_format);
-   if (!resource_formats)
-      return NULL;
-
-   buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
-   buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
-
-   result = vl_video_buffer_init(context, ctx->pipe,
-                                 buffer_width, buffer_height, 1,
-                                 chroma_format,
-                                 resource_formats,
-                                 PIPE_USAGE_STATIC);
-   if (result) // TODO move format handling into vl_video_buffer
-      result->buffer_format = buffer_format;
-
-   return result;
-}
-
 struct pipe_video_context *
 vl_create_context(struct pipe_context *pipe)
 {
@@ -128,7 +88,6 @@ vl_create_context(struct pipe_context *pipe)
 
    ctx->base.destroy = vl_context_destroy;
    ctx->base.create_decoder = vl_context_create_decoder;
-   ctx->base.create_buffer = vl_context_create_buffer;
 
    ctx->pipe = pipe;
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index eacb49e83c0..c2ddd2cb2ce 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -92,11 +92,14 @@ init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
    dec = (struct vl_mpeg12_decoder*)buffer->base.decoder;
 
    formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
-   buffer->zscan_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                               dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
-                                               align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line,
-                                               1, PIPE_VIDEO_CHROMA_FORMAT_444,
-                                               formats, PIPE_USAGE_STATIC);
+   buffer->zscan_source = vl_video_buffer_create_ex
+   (
+      dec->pipe,
+      dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
+      align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line,
+      1, PIPE_VIDEO_CHROMA_FORMAT_444, formats, PIPE_USAGE_STATIC
+   );
+
    if (!buffer->zscan_source)
       goto error_source;
 
@@ -718,19 +721,22 @@ init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_conf
       nr_of_idct_render_targets = 1;
 
    formats[0] = formats[1] = formats[2] = format_config->idct_source_format;
-   dec->idct_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                           dec->base.width / 4, dec->base.height, 1,
-                                           dec->base.chroma_format,
-                                           formats, PIPE_USAGE_STATIC);
+   dec->idct_source = vl_video_buffer_create_ex
+   (
+      dec->pipe, dec->base.width / 4, dec->base.height, 1,
+      dec->base.chroma_format, formats, PIPE_USAGE_STATIC
+   );
+
    if (!dec->idct_source)
       goto error_idct_source;
 
    formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
-   dec->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                         dec->base.width / nr_of_idct_render_targets,
-                                         dec->base.height / 4, nr_of_idct_render_targets,
-                                         dec->base.chroma_format,
-                                         formats, PIPE_USAGE_STATIC);
+   dec->mc_source = vl_video_buffer_create_ex
+   (
+      dec->pipe, dec->base.width / nr_of_idct_render_targets,
+      dec->base.height / 4, nr_of_idct_render_targets,
+      dec->base.chroma_format, formats, PIPE_USAGE_STATIC
+   );
 
    if (!dec->mc_source)
       goto error_mc_source;
@@ -772,11 +778,12 @@ init_mc_source_widthout_idct(struct vl_mpeg12_decoder *dec, const struct format_
    enum pipe_format formats[3];
 
    formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
-   dec->mc_source = vl_video_buffer_init(dec->base.context, dec->pipe,
-                                         dec->base.width, dec->base.height, 1,
-                                         dec->base.chroma_format,
-                                         formats, PIPE_USAGE_STATIC);
-
+   dec->mc_source = vl_video_buffer_create_ex
+   (
+      dec->pipe, dec->base.width, dec->base.height, 1,
+      dec->base.chroma_format, formats, PIPE_USAGE_STATIC
+   );
+      
    return dec->mc_source != NULL;
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
index 9b7bab47484..49b7b50cfee 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -114,7 +114,7 @@ vl_video_buffer_sampler_view_planes(struct pipe_video_buffer *buffer)
 
    assert(buf);
 
-   pipe = buf->pipe;
+   pipe = buf->base.context;
 
    for (i = 0; i < buf->num_planes; ++i ) {
       if (!buf->sampler_view_planes[i]) {
@@ -149,7 +149,7 @@ vl_video_buffer_sampler_view_components(struct pipe_video_buffer *buffer)
 
    assert(buf);
 
-   pipe = buf->pipe;
+   pipe = buf->base.context;
 
    for (component = 0, i = 0; i < buf->num_planes; ++i ) {
       unsigned nr_components = util_format_get_nr_components(buf->resources[i]->format);
@@ -188,7 +188,7 @@ vl_video_buffer_surfaces(struct pipe_video_buffer *buffer)
 
    assert(buf);
 
-   pipe = buf->pipe;
+   pipe = buf->base.context;
 
    for (i = 0; i < buf->num_planes; ++i ) {
       if (!buf->surfaces[i]) {
@@ -211,21 +211,60 @@ error:
 }
 
 struct pipe_video_buffer *
-vl_video_buffer_init(struct pipe_video_context *context,
-                     struct pipe_context *pipe,
-                     unsigned width, unsigned height, unsigned depth,
-                     enum pipe_video_chroma_format chroma_format,
-                     const enum pipe_format resource_formats[VL_MAX_PLANES],
-                     unsigned usage)
+vl_video_buffer_create(struct pipe_context *pipe,
+                       enum pipe_format buffer_format,
+                       enum pipe_video_chroma_format chroma_format,
+                       unsigned width, unsigned height)
+{
+   const enum pipe_format *resource_formats;
+   struct pipe_video_buffer *result;
+   unsigned buffer_width, buffer_height;
+   bool pot_buffers;
+
+   assert(pipe);
+   assert(width > 0 && height > 0);
+
+   pot_buffers = !pipe->screen->get_video_param
+   (
+      pipe->screen,
+      PIPE_VIDEO_PROFILE_UNKNOWN,
+      PIPE_VIDEO_CAP_NPOT_TEXTURES
+   );
+
+   resource_formats = vl_video_buffer_formats(pipe->screen, buffer_format);
+   if (!resource_formats)
+      return NULL;
+
+   buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
+   buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
+
+   result = vl_video_buffer_create_ex
+   (
+      pipe, buffer_width, buffer_height, 1,
+      chroma_format, resource_formats, PIPE_USAGE_STATIC
+   );
+   if (result)
+      result->buffer_format = buffer_format;
+
+   return result;
+}
+
+struct pipe_video_buffer *
+vl_video_buffer_create_ex(struct pipe_context *pipe,
+                          unsigned width, unsigned height, unsigned depth,
+                          enum pipe_video_chroma_format chroma_format,
+                          const enum pipe_format resource_formats[VL_MAX_PLANES],
+                          unsigned usage)
 {
    struct vl_video_buffer *buffer;
    struct pipe_resource templ;
    unsigned i;
 
-   assert(context && pipe);
+   assert(pipe);
 
    buffer = CALLOC_STRUCT(vl_video_buffer);
 
+   buffer->base.context = pipe;
    buffer->base.destroy = vl_video_buffer_destroy;
    buffer->base.get_sampler_view_planes = vl_video_buffer_sampler_view_planes;
    buffer->base.get_sampler_view_components = vl_video_buffer_sampler_view_components;
@@ -233,7 +272,6 @@ vl_video_buffer_init(struct pipe_video_context *context,
    buffer->base.chroma_format = chroma_format;
    buffer->base.width = width;
    buffer->base.height = height;
-   buffer->pipe = pipe;
    buffer->num_planes = 1;
 
    memset(&templ, 0, sizeof(templ));
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h
index 8755c54dc73..172f332712b 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.h
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.h
@@ -41,7 +41,6 @@
 struct vl_video_buffer
 {
    struct pipe_video_buffer base;
-   struct pipe_context      *pipe;
    unsigned                 num_planes;
    struct pipe_resource     *resources[VL_MAX_PLANES];
    struct pipe_sampler_view *sampler_view_planes[VL_MAX_PLANES];
@@ -63,15 +62,24 @@ boolean
 vl_video_buffer_is_format_supported(struct pipe_screen *screen,
                                     enum pipe_format format,
                                     enum pipe_video_profile profile);
+                                    
+/**
+ * creates a video buffer, can be used as a standard implementation for pipe->create_video_buffer
+ */
+struct pipe_video_buffer *
+vl_video_buffer_create(struct pipe_context *pipe,
+                       enum pipe_format buffer_format,
+                       enum pipe_video_chroma_format chroma_format,
+                       unsigned width, unsigned height);
 
 /**
- * initialize a buffer, creating its resources
+ * extended create function, gets depth, usage and formats for each plane seperately
  */
 struct pipe_video_buffer *
-vl_video_buffer_init(struct pipe_video_context *context,
-                     struct pipe_context *pipe,
-                     unsigned width, unsigned height, unsigned depth,
-                     enum pipe_video_chroma_format chroma_format,
-                     const enum pipe_format resource_formats[VL_MAX_PLANES],
-                     unsigned usage);
+vl_video_buffer_create_ex(struct pipe_context *pipe,
+                          unsigned width, unsigned height, unsigned depth,
+                          enum pipe_video_chroma_format chroma_format,
+                          const enum pipe_format resource_formats[VL_MAX_PLANES],
+                          unsigned usage);
+
 #endif /* vl_ycbcr_buffer_h */
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 0554c40eef0..7d22ffb6a94 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -27,6 +27,7 @@
 #include "util/u_simple_list.h"
 #include "util/u_upload_mgr.h"
 #include "os/os_time.h"
+#include "vl/vl_video_buffer.h"
 
 #include "r300_cb.h"
 #include "r300_context.h"
@@ -436,6 +437,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300_init_query_functions(r300);
     r300_init_state_functions(r300);
     r300_init_resource_functions(r300);
+    
+    r300->context.create_video_buffer = vl_video_buffer_create;
 
     r300->vbuf_mgr = u_vbuf_mgr_create(&r300->context, 1024 * 1024, 16,
                                        PIPE_BIND_VERTEX_BUFFER |
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index b3264c5a670..d8b51ea4871 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -226,6 +226,7 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 	r600_init_context_resource_functions(rctx);
 	r600_init_surface_functions(rctx);
 	rctx->context.draw_vbo = r600_draw_vbo;
+	rctx->context.create_video_buffer = vl_video_buffer_create;
 
 	switch (r600_get_family(rctx->radeon)) {
 	case CHIP_R600:
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index ce22f646228..0e623944e59 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -37,6 +37,7 @@
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "tgsi/tgsi_exec.h"
+#include "vl/vl_video_buffer.h"
 #include "sp_clear.h"
 #include "sp_context.h"
 #include "sp_flush.h"
@@ -258,6 +259,8 @@ softpipe_create_context( struct pipe_screen *screen,
    softpipe->pipe.flush = softpipe_flush_wrapped;
 
    softpipe->pipe.render_condition = softpipe_render_condition;
+   
+   softpipe->pipe.create_video_buffer = vl_video_buffer_create;
 
    /*
     * Alloc caches for accessing drawing surfaces and textures.
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index d8de3bac0ec..c02b060e4bc 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -59,6 +59,9 @@ struct pipe_vertex_buffer;
 struct pipe_vertex_element;
 struct pipe_viewport_state;
 
+enum pipe_video_chroma_format;
+enum pipe_format;
+
 /**
  * Gallium rendering context.  Basically:
  *  - state setting functions
@@ -395,6 +398,14 @@ struct pipe_context {
     * Flush any pending framebuffer writes and invalidate texture caches.
     */
    void (*texture_barrier)(struct pipe_context *);
+   
+   /**
+    * Creates a video buffer as decoding target
+    */
+   struct pipe_video_buffer *(*create_video_buffer)( struct pipe_context *context,
+                                                     enum pipe_format buffer_format,
+                                                     enum pipe_video_chroma_format chroma_format,
+                                                     unsigned width, unsigned height );
 };
 
 
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
index aa42e8e0884..0ac0c4bed0b 100644
--- a/src/gallium/include/pipe/p_video_context.h
+++ b/src/gallium/include/pipe/p_video_context.h
@@ -62,13 +62,6 @@ struct pipe_video_context
                                                 enum pipe_video_chroma_format chroma_format,
                                                 unsigned width, unsigned height);
 
-   /**
-    * Creates a buffer as decoding target
-    */
-   struct pipe_video_buffer *(*create_buffer)(struct pipe_video_context *context,
-                                              enum pipe_format buffer_format,
-                                              enum pipe_video_chroma_format chroma_format,
-                                              unsigned width, unsigned height);
 };
 
 /**
@@ -166,7 +159,7 @@ struct pipe_video_decode_buffer
  */
 struct pipe_video_buffer
 {
-   struct pipe_video_context *context;
+   struct pipe_context *context;
 
    enum pipe_format buffer_format;
    enum pipe_video_chroma_format chroma_format;
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index f20087f3fca..b8c4d2cd150 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -70,9 +70,9 @@ vlVdpVideoSurfaceCreate(VdpDevice device, VdpChromaType chroma_type,
    }
 
    p_surf->device = dev;
-   p_surf->video_buffer = dev->context->vpipe->create_buffer
+   p_surf->video_buffer = dev->context->pipe->create_video_buffer
    (
-      dev->context->vpipe,
+      dev->context->pipe,
       PIPE_FORMAT_YV12, // most common used
       ChromaToPipe(chroma_type),
       width, height
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 292610be631..0370a6e858f 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -303,6 +303,7 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
    };
 
    XvMCContextPrivate *context_priv;
+   struct pipe_context *pipe;
    struct pipe_video_context *vpipe;
    XvMCSurfacePrivate *surface_priv;
 
@@ -316,6 +317,7 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
       return XvMCBadSurface;
 
    context_priv = context->privData;
+   pipe = context_priv->vctx->pipe;
    vpipe = context_priv->vctx->vpipe;
 
    surface_priv = CALLOC(1, sizeof(XvMCSurfacePrivate));
@@ -326,10 +328,12 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
    surface_priv->decode_buffer->set_quant_matrix(surface_priv->decode_buffer, dummy_quant, dummy_quant);
 
    surface_priv->mv_stride = surface_priv->decode_buffer->get_mv_stream_stride(surface_priv->decode_buffer);
-   surface_priv->video_buffer = vpipe->create_buffer(vpipe, PIPE_FORMAT_NV12,
-                                                     context_priv->decoder->chroma_format,
-                                                     context_priv->decoder->width,
-                                                     context_priv->decoder->height);
+   surface_priv->video_buffer = pipe->create_video_buffer
+   (
+      pipe, PIPE_FORMAT_NV12, context_priv->decoder->chroma_format,
+      context_priv->decoder->width, context_priv->decoder->height
+   );
+   
    surface_priv->context = context;
 
    surface->surface_id = XAllocID(dpy);
-- 
cgit v1.2.3


From ea78480029450c019287c2a94d7c42a6a1d12dc3 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Jul 2011 19:22:43 +0200
Subject: [g3dvl] and finally remove pipe_video_context

---
 src/gallium/auxiliary/Makefile                    |   2 +-
 src/gallium/auxiliary/vl/vl_compositor.h          |   2 +-
 src/gallium/auxiliary/vl/vl_context.c             |  95 -----------
 src/gallium/auxiliary/vl/vl_context.h             |  48 ------
 src/gallium/auxiliary/vl/vl_decoder.c             |  65 ++++++++
 src/gallium/auxiliary/vl/vl_decoder.h             |  44 +++++
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c      | 106 ++++++------
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.h      |  11 +-
 src/gallium/auxiliary/vl/vl_video_buffer.h        |   2 +-
 src/gallium/drivers/r300/r300_context.c           |   2 +
 src/gallium/drivers/r300/r300_screen.c            |  10 --
 src/gallium/drivers/r600/r600_pipe.c              |  13 +-
 src/gallium/drivers/softpipe/sp_context.c         |   2 +
 src/gallium/drivers/softpipe/sp_screen.c          |  10 --
 src/gallium/include/pipe/p_context.h              |  11 ++
 src/gallium/include/pipe/p_screen.h               |   3 -
 src/gallium/include/pipe/p_video_context.h        | 194 ----------------------
 src/gallium/include/pipe/p_video_decoder.h        | 170 +++++++++++++++++++
 src/gallium/state_trackers/vdpau/decode.c         |  10 +-
 src/gallium/state_trackers/vdpau/device.c         |   1 -
 src/gallium/state_trackers/vdpau/mixer.c          |   3 -
 src/gallium/state_trackers/vdpau/output.c         |   6 +-
 src/gallium/state_trackers/vdpau/presentation.c   |   7 +-
 src/gallium/state_trackers/vdpau/surface.c        |   5 +-
 src/gallium/state_trackers/vdpau/vdpau_private.h  |   2 +-
 src/gallium/state_trackers/xorg/xvmc/context.c    |  17 +-
 src/gallium/state_trackers/xorg/xvmc/subpicture.c |  14 +-
 src/gallium/state_trackers/xorg/xvmc/surface.c    |  25 +--
 src/gallium/winsys/g3dvl/dri/dri_winsys.c         |  18 +-
 src/gallium/winsys/g3dvl/vl_winsys.h              |   2 -
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c        |  13 +-
 31 files changed, 399 insertions(+), 514 deletions(-)
 delete mode 100644 src/gallium/auxiliary/vl/vl_context.c
 delete mode 100644 src/gallium/auxiliary/vl/vl_context.h
 create mode 100644 src/gallium/auxiliary/vl/vl_decoder.c
 create mode 100644 src/gallium/auxiliary/vl/vl_decoder.h
 delete mode 100644 src/gallium/include/pipe/p_video_context.h
 create mode 100644 src/gallium/include/pipe/p_video_decoder.h

diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile
index e37cf21416a..7dae7bc908b 100644
--- a/src/gallium/auxiliary/Makefile
+++ b/src/gallium/auxiliary/Makefile
@@ -148,9 +148,9 @@ C_SOURCES = \
 	util/u_resource.c \
 	util/u_upload_mgr.c \
 	util/u_vbuf_mgr.c \
-	vl/vl_context.c \
 	vl/vl_csc.c \
 	vl/vl_compositor.c \
+	vl/vl_decoder.c \
 	vl/vl_mpeg12_decoder.c \
 	vl/vl_mpeg12_bitstream.c \
 	vl/vl_zscan.c \
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 97601897b66..df662db4d91 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -29,7 +29,7 @@
 #define vl_compositor_h
 
 #include <pipe/p_state.h>
-#include <pipe/p_video_context.h>
+#include <pipe/p_video_decoder.h>
 #include <pipe/p_video_state.h>
 
 #include "vl_types.h"
diff --git a/src/gallium/auxiliary/vl/vl_context.c b/src/gallium/auxiliary/vl/vl_context.c
deleted file mode 100644
index fec227dc01f..00000000000
--- a/src/gallium/auxiliary/vl/vl_context.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include <pipe/p_video_context.h>
-
-#include <util/u_memory.h>
-#include <util/u_rect.h>
-#include <util/u_video.h>
-
-#include "vl_context.h"
-#include "vl_compositor.h"
-#include "vl_mpeg12_decoder.h"
-
-static void
-vl_context_destroy(struct pipe_video_context *context)
-{
-   struct vl_context *ctx = (struct vl_context*)context;
-
-   assert(context);
-
-   FREE(ctx);
-}
-
-static struct pipe_video_decoder *
-vl_context_create_decoder(struct pipe_video_context *context,
-                          enum pipe_video_profile profile,
-                          enum pipe_video_entrypoint entrypoint,
-                          enum pipe_video_chroma_format chroma_format,
-                          unsigned width, unsigned height)
-{
-   struct vl_context *ctx = (struct vl_context*)context;
-   unsigned buffer_width, buffer_height;
-   bool pot_buffers;
-
-   assert(context);
-   assert(width > 0 && height > 0);
-   
-   pot_buffers = !ctx->base.screen->get_video_param(ctx->base.screen, profile, PIPE_VIDEO_CAP_NPOT_TEXTURES);
-
-   buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
-   buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
-
-   switch (u_reduce_video_profile(profile)) {
-      case PIPE_VIDEO_CODEC_MPEG12:
-         return vl_create_mpeg12_decoder(context, ctx->pipe, profile, entrypoint,
-                                         chroma_format, buffer_width, buffer_height);
-      default:
-         return NULL;
-   }
-   return NULL;
-}
-
-struct pipe_video_context *
-vl_create_context(struct pipe_context *pipe)
-{
-   struct vl_context *ctx;
-
-   ctx = CALLOC_STRUCT(vl_context);
-
-   if (!ctx)
-      return NULL;
-
-   ctx->base.screen = pipe->screen;
-
-   ctx->base.destroy = vl_context_destroy;
-   ctx->base.create_decoder = vl_context_create_decoder;
-
-   ctx->pipe = pipe;
-
-   return &ctx->base;
-}
diff --git a/src/gallium/auxiliary/vl/vl_context.h b/src/gallium/auxiliary/vl/vl_context.h
deleted file mode 100644
index 4fbe2651d89..00000000000
--- a/src/gallium/auxiliary/vl/vl_context.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * Copyright 2011 Christian König.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef vl_context_h
-#define vl_context_h
-
-#include <pipe/p_video_context.h>
-
-struct pipe_screen;
-struct pipe_context;
-
-struct vl_context
-{
-   struct pipe_video_context base;
-   struct pipe_context *pipe;
-};
-
-/* drivers can call this function in their pipe_video_context constructors and pass it
-   an accelerated pipe_context along with suitable buffering modes, etc */
-struct pipe_video_context *
-vl_create_context(struct pipe_context *pipe);
-
-#endif /* vl_context_h */
diff --git a/src/gallium/auxiliary/vl/vl_decoder.c b/src/gallium/auxiliary/vl/vl_decoder.c
new file mode 100644
index 00000000000..2be5c17ed3e
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_decoder.c
@@ -0,0 +1,65 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <pipe/p_video_decoder.h>
+
+#include <util/u_video.h>
+
+#include "vl_decoder.h"
+#include "vl_mpeg12_decoder.h"
+
+struct pipe_video_decoder *
+vl_create_decoder(struct pipe_context *pipe,
+                  enum pipe_video_profile profile,
+                  enum pipe_video_entrypoint entrypoint,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height)
+{
+   unsigned buffer_width, buffer_height;
+   bool pot_buffers;
+
+   assert(pipe);
+   assert(width > 0 && height > 0);
+   
+   pot_buffers = !pipe->screen->get_video_param
+   (
+      pipe->screen,
+      profile,
+      PIPE_VIDEO_CAP_NPOT_TEXTURES
+   );
+
+   buffer_width = pot_buffers ? util_next_power_of_two(width) : align(width, MACROBLOCK_WIDTH);
+   buffer_height = pot_buffers ? util_next_power_of_two(height) : align(height, MACROBLOCK_HEIGHT);
+
+   switch (u_reduce_video_profile(profile)) {
+      case PIPE_VIDEO_CODEC_MPEG12:
+         return vl_create_mpeg12_decoder(pipe, profile, entrypoint, chroma_format, buffer_width, buffer_height);
+      default:
+         return NULL;
+   }
+   return NULL;
+}
diff --git a/src/gallium/auxiliary/vl/vl_decoder.h b/src/gallium/auxiliary/vl/vl_decoder.h
new file mode 100644
index 00000000000..440f5ecfb04
--- /dev/null
+++ b/src/gallium/auxiliary/vl/vl_decoder.h
@@ -0,0 +1,44 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef vl_decoder_h
+#define vl_decoder_h
+
+#include <pipe/p_video_decoder.h>
+
+/**
+ * standard implementation of pipe->create_video_decoder
+ */
+struct pipe_video_decoder *
+vl_create_decoder(struct pipe_context *pipe,
+                  enum pipe_video_profile profile,
+                  enum pipe_video_entrypoint entrypoint,
+                  enum pipe_video_chroma_format chroma_format,
+                  unsigned width, unsigned height);
+
+#endif /* vl_decoder_h */
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index c2ddd2cb2ce..b866e0e5aec 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -94,7 +94,7 @@ init_zscan_buffer(struct vl_mpeg12_buffer *buffer)
    formats[0] = formats[1] = formats[2] = dec->zscan_source_format;
    buffer->zscan_source = vl_video_buffer_create_ex
    (
-      dec->pipe,
+      dec->base.context,
       dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT,
       align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line,
       1, PIPE_VIDEO_CHROMA_FORMAT_444, formats, PIPE_USAGE_STATIC
@@ -277,7 +277,7 @@ vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer)
    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
    assert(dec);
 
-   vl_vb_map(&buf->vertex_stream, dec->pipe);
+   vl_vb_map(&buf->vertex_stream, dec->base.context);
 
    sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source);
 
@@ -293,14 +293,14 @@ vl_mpeg12_buffer_begin_frame(struct pipe_video_decode_buffer *buffer)
          1
       };
 
-      buf->tex_transfer[i] = dec->pipe->get_transfer
+      buf->tex_transfer[i] = dec->base.context->get_transfer
       (
-         dec->pipe, tex,
+         dec->base.context, tex,
          0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
          &rect
       );
 
-      buf->texels[i] = dec->pipe->transfer_map(dec->pipe, buf->tex_transfer[i]);
+      buf->texels[i] = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer[i]);
    }
 
    if (dec->base.entrypoint == PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
@@ -407,11 +407,11 @@ vl_mpeg12_buffer_end_frame(struct pipe_video_decode_buffer *buffer)
    dec = (struct vl_mpeg12_decoder *)buf->base.decoder;
    assert(dec);
 
-   vl_vb_unmap(&buf->vertex_stream, dec->pipe);
+   vl_vb_unmap(&buf->vertex_stream, dec->base.context);
 
    for (i = 0; i < VL_MAX_PLANES; ++i) {
-      dec->pipe->transfer_unmap(dec->pipe, buf->tex_transfer[i]);
-      dec->pipe->transfer_destroy(dec->pipe, buf->tex_transfer[i]);
+      dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer[i]);
+      dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer[i]);
    }
 }
 
@@ -423,11 +423,11 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    assert(decoder);
 
    /* Asserted in softpipe_delete_fs_state() for some reason */
-   dec->pipe->bind_vs_state(dec->pipe, NULL);
-   dec->pipe->bind_fs_state(dec->pipe, NULL);
+   dec->base.context->bind_vs_state(dec->base.context, NULL);
+   dec->base.context->bind_fs_state(dec->base.context, NULL);
 
-   dec->pipe->delete_depth_stencil_alpha_state(dec->pipe, dec->dsa);
-   dec->pipe->delete_sampler_state(dec->pipe, dec->sampler_ycbcr);
+   dec->base.context->delete_depth_stencil_alpha_state(dec->base.context, dec->dsa);
+   dec->base.context->delete_sampler_state(dec->base.context, dec->sampler_ycbcr);
 
    vl_mc_cleanup(&dec->mc_y);
    vl_mc_cleanup(&dec->mc_c);
@@ -442,8 +442,8 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
    vl_zscan_cleanup(&dec->zscan_y);
    vl_zscan_cleanup(&dec->zscan_c);
 
-   dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
-   dec->pipe->delete_vertex_elements_state(dec->pipe, dec->ves_mv);
+   dec->base.context->delete_vertex_elements_state(dec->base.context, dec->ves_ycbcr);
+   dec->base.context->delete_vertex_elements_state(dec->base.context, dec->ves_mv);
 
    pipe_resource_reference(&dec->quads.buffer, NULL);
    pipe_resource_reference(&dec->pos.buffer, NULL);
@@ -478,7 +478,7 @@ vl_mpeg12_create_buffer(struct pipe_video_decoder *decoder)
    buffer->base.decode_bitstream = vl_mpeg12_buffer_decode_bitstream;
    buffer->base.end_frame = vl_mpeg12_buffer_end_frame;
 
-   if (!vl_vb_init(&buffer->vertex_stream, dec->pipe,
+   if (!vl_vb_init(&buffer->vertex_stream, dec->base.context,
                    dec->base.width / MACROBLOCK_WIDTH,
                    dec->base.height / MACROBLOCK_HEIGHT))
       goto error_vertex_buffer;
@@ -545,7 +545,7 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 
    surfaces = dst->get_surfaces(dst);
 
-   dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_mv);
+   dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_mv);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
       if (!surfaces[i]) continue;
 
@@ -555,7 +555,7 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
          if (!sv[j]) continue;
 
          vb[2] = vl_vb_get_mv(&buf->vertex_stream, j);;
-         dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
+         dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
 
          vl_mc_render_ref(&buf->mc[i], sv[j][i]);
       }
@@ -563,12 +563,12 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
 
    vb[2] = dec->block_num;
 
-   dec->pipe->bind_vertex_elements_state(dec->pipe, dec->ves_ycbcr);
+   dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_ycbcr);
    for (i = 0; i < VL_MAX_PLANES; ++i) {
       if (!num_ycbcr_blocks[i]) continue;
 
       vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i);
-      dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
+      dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
 
       vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]);
 
@@ -585,13 +585,13 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
          if (!num_ycbcr_blocks[i]) continue;
 
          vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component);
-         dec->pipe->set_vertex_buffers(dec->pipe, 3, vb);
+         dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
 
          if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
             vl_idct_prepare_stage2(component == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[component]);
          else {
-            dec->pipe->set_fragment_sampler_views(dec->pipe, 1, &mc_source_sv[component]);
-            dec->pipe->bind_fragment_sampler_states(dec->pipe, 1, &dec->sampler_ycbcr);
+            dec->base.context->set_fragment_sampler_views(dec->base.context, 1, &mc_source_sv[component]);
+            dec->base.context->bind_fragment_sampler_states(dec->base.context, 1, &dec->sampler_ycbcr);
          }
          vl_mc_render_ycbcr(&buf->mc[i], j, num_ycbcr_blocks[component]);
       }
@@ -623,8 +623,8 @@ init_pipe_state(struct vl_mpeg12_decoder *dec)
    dsa.alpha.enabled = 0;
    dsa.alpha.func = PIPE_FUNC_ALWAYS;
    dsa.alpha.ref_value = 0;
-   dec->dsa = dec->pipe->create_depth_stencil_alpha_state(dec->pipe, &dsa);
-   dec->pipe->bind_depth_stencil_alpha_state(dec->pipe, dec->dsa);
+   dec->dsa = dec->base.context->create_depth_stencil_alpha_state(dec->base.context, &dsa);
+   dec->base.context->bind_depth_stencil_alpha_state(dec->base.context, dec->dsa);
 
    memset(&sampler, 0, sizeof(sampler));
    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
@@ -636,7 +636,7 @@ init_pipe_state(struct vl_mpeg12_decoder *dec)
    sampler.compare_mode = PIPE_TEX_COMPARE_NONE;
    sampler.compare_func = PIPE_FUNC_ALWAYS;
    sampler.normalized_coords = 1;
-   dec->sampler_ycbcr = dec->pipe->create_sampler_state(dec->pipe, &sampler);
+   dec->sampler_ycbcr = dec->base.context->create_sampler_state(dec->base.context, &sampler);
    if (!dec->sampler_ycbcr)
       return false;
 
@@ -651,7 +651,7 @@ find_format_config(struct vl_mpeg12_decoder *dec, const struct format_config con
 
    assert(dec);
 
-   screen = dec->pipe->screen;
+   screen = dec->base.context->screen;
 
    for (i = 0; i < num_configs; ++i) {
       if (!screen->is_format_supported(screen, configs[i].zscan_source_format, PIPE_TEXTURE_2D,
@@ -685,17 +685,17 @@ init_zscan(struct vl_mpeg12_decoder *dec, const struct format_config* format_con
    assert(dec);
 
    dec->zscan_source_format = format_config->zscan_source_format;
-   dec->zscan_linear = vl_zscan_layout(dec->pipe, vl_zscan_linear, dec->blocks_per_line);
-   dec->zscan_normal = vl_zscan_layout(dec->pipe, vl_zscan_normal, dec->blocks_per_line);
-   dec->zscan_alternate = vl_zscan_layout(dec->pipe, vl_zscan_alternate, dec->blocks_per_line);
+   dec->zscan_linear = vl_zscan_layout(dec->base.context, vl_zscan_linear, dec->blocks_per_line);
+   dec->zscan_normal = vl_zscan_layout(dec->base.context, vl_zscan_normal, dec->blocks_per_line);
+   dec->zscan_alternate = vl_zscan_layout(dec->base.context, vl_zscan_alternate, dec->blocks_per_line);
 
    num_channels = dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT ? 4 : 1;
 
-   if (!vl_zscan_init(&dec->zscan_y, dec->pipe, dec->base.width, dec->base.height,
+   if (!vl_zscan_init(&dec->zscan_y, dec->base.context, dec->base.width, dec->base.height,
                       dec->blocks_per_line, dec->num_blocks, num_channels))
       return false;
 
-   if (!vl_zscan_init(&dec->zscan_c, dec->pipe, dec->chroma_width, dec->chroma_height,
+   if (!vl_zscan_init(&dec->zscan_c, dec->base.context, dec->chroma_width, dec->chroma_height,
                       dec->blocks_per_line, dec->num_blocks, num_channels))
       return false;
 
@@ -710,8 +710,15 @@ init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_conf
 
    struct pipe_sampler_view *matrix = NULL;
 
-   nr_of_idct_render_targets = dec->pipe->screen->get_param(dec->pipe->screen, PIPE_CAP_MAX_RENDER_TARGETS);
-   max_inst = dec->pipe->screen->get_shader_param(dec->pipe->screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_INSTRUCTIONS);
+   nr_of_idct_render_targets = dec->base.context->screen->get_param
+   (
+      dec->base.context->screen, PIPE_CAP_MAX_RENDER_TARGETS
+   );
+   
+   max_inst = dec->base.context->screen->get_shader_param
+   (
+      dec->base.context->screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_INSTRUCTIONS
+   );
 
    // Just assume we need 32 inst per render target, not 100% true, but should work in most cases
    if (nr_of_idct_render_targets >= 4 && max_inst >= 32*4)
@@ -723,7 +730,7 @@ init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_conf
    formats[0] = formats[1] = formats[2] = format_config->idct_source_format;
    dec->idct_source = vl_video_buffer_create_ex
    (
-      dec->pipe, dec->base.width / 4, dec->base.height, 1,
+      dec->base.context, dec->base.width / 4, dec->base.height, 1,
       dec->base.chroma_format, formats, PIPE_USAGE_STATIC
    );
 
@@ -733,7 +740,7 @@ init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_conf
    formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
    dec->mc_source = vl_video_buffer_create_ex
    (
-      dec->pipe, dec->base.width / nr_of_idct_render_targets,
+      dec->base.context, dec->base.width / nr_of_idct_render_targets,
       dec->base.height / 4, nr_of_idct_render_targets,
       dec->base.chroma_format, formats, PIPE_USAGE_STATIC
    );
@@ -741,14 +748,14 @@ init_idct(struct vl_mpeg12_decoder *dec, const struct format_config* format_conf
    if (!dec->mc_source)
       goto error_mc_source;
 
-   if (!(matrix = vl_idct_upload_matrix(dec->pipe, format_config->idct_scale)))
+   if (!(matrix = vl_idct_upload_matrix(dec->base.context, format_config->idct_scale)))
       goto error_matrix;
 
-   if (!vl_idct_init(&dec->idct_y, dec->pipe, dec->base.width, dec->base.height,
+   if (!vl_idct_init(&dec->idct_y, dec->base.context, dec->base.width, dec->base.height,
                      nr_of_idct_render_targets, matrix, matrix))
       goto error_y;
 
-   if(!vl_idct_init(&dec->idct_c, dec->pipe, dec->chroma_width, dec->chroma_height,
+   if(!vl_idct_init(&dec->idct_c, dec->base.context, dec->chroma_width, dec->chroma_height,
                     nr_of_idct_render_targets, matrix, matrix))
       goto error_c;
 
@@ -780,7 +787,7 @@ init_mc_source_widthout_idct(struct vl_mpeg12_decoder *dec, const struct format_
    formats[0] = formats[1] = formats[2] = format_config->mc_source_format;
    dec->mc_source = vl_video_buffer_create_ex
    (
-      dec->pipe, dec->base.width, dec->base.height, 1,
+      dec->base.context, dec->base.width, dec->base.height, 1,
       dec->base.chroma_format, formats, PIPE_USAGE_STATIC
    );
       
@@ -831,8 +838,7 @@ mc_frag_shader_callback(void *priv, struct vl_mc *mc,
 }
 
 struct pipe_video_decoder *
-vl_create_mpeg12_decoder(struct pipe_video_context *context,
-                         struct pipe_context *pipe,
+vl_create_mpeg12_decoder(struct pipe_context *context,
                          enum pipe_video_profile profile,
                          enum pipe_video_entrypoint entrypoint,
                          enum pipe_video_chroma_format chroma_format,
@@ -860,21 +866,19 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
    dec->base.create_buffer = vl_mpeg12_create_buffer;
    dec->base.flush_buffer = vl_mpeg12_decoder_flush_buffer;
 
-   dec->pipe = pipe;
-
    dec->blocks_per_line = MAX2(util_next_power_of_two(dec->base.width) / block_size_pixels, 4);
    dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels;
 
-   dec->quads = vl_vb_upload_quads(dec->pipe);
+   dec->quads = vl_vb_upload_quads(dec->base.context);
    dec->pos = vl_vb_upload_pos(
-      dec->pipe,
+      dec->base.context,
       dec->base.width / MACROBLOCK_WIDTH,
       dec->base.height / MACROBLOCK_HEIGHT
    );
-   dec->block_num = vl_vb_upload_block_num(dec->pipe, dec->num_blocks);
+   dec->block_num = vl_vb_upload_block_num(dec->base.context, dec->num_blocks);
 
-   dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->pipe);
-   dec->ves_mv = vl_vb_get_ves_mv(dec->pipe);
+   dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context);
+   dec->ves_mv = vl_vb_get_ves_mv(dec->base.context);
 
    /* TODO: Implement 422, 444 */
    assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
@@ -922,12 +926,14 @@ vl_create_mpeg12_decoder(struct pipe_video_context *context,
          goto error_sources;
    }
 
-   if (!vl_mc_init(&dec->mc_y, dec->pipe, dec->base.width, dec->base.height, MACROBLOCK_HEIGHT, format_config->mc_scale,
+   if (!vl_mc_init(&dec->mc_y, dec->base.context, dec->base.width, dec->base.height,
+                   MACROBLOCK_HEIGHT, format_config->mc_scale,
                    mc_vert_shader_callback, mc_frag_shader_callback, dec))
       goto error_mc_y;
 
    // TODO
-   if (!vl_mc_init(&dec->mc_c, dec->pipe, dec->base.width, dec->base.height, BLOCK_HEIGHT, format_config->mc_scale,
+   if (!vl_mc_init(&dec->mc_c, dec->base.context, dec->base.width, dec->base.height,
+                   BLOCK_HEIGHT, format_config->mc_scale,
                    mc_vert_shader_callback, mc_frag_shader_callback, dec))
       goto error_mc_c;
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 474ae2d5d29..01265e368a3 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -28,7 +28,7 @@
 #ifndef vl_mpeg12_decoder_h
 #define vl_mpeg12_decoder_h
 
-#include <pipe/p_video_context.h>
+#include <pipe/p_video_decoder.h>
 
 #include "vl_mpeg12_bitstream.h"
 #include "vl_zscan.h"
@@ -44,7 +44,6 @@ struct pipe_context;
 struct vl_mpeg12_decoder
 {
    struct pipe_video_decoder base;
-   struct pipe_context *pipe;
 
    unsigned chroma_width, chroma_height;
 
@@ -93,11 +92,11 @@ struct vl_mpeg12_buffer
    short *texels[VL_MAX_PLANES];
 };
 
-/* drivers can call this function in their pipe_video_context constructors and pass it
-   an accelerated pipe_context along with suitable buffering modes, etc */
+/**
+ * creates a shader based mpeg12 decoder
+ */
 struct pipe_video_decoder *
-vl_create_mpeg12_decoder(struct pipe_video_context *context,
-                         struct pipe_context *pipe,
+vl_create_mpeg12_decoder(struct pipe_context *pipe,
                          enum pipe_video_profile profile,
                          enum pipe_video_entrypoint entrypoint,
                          enum pipe_video_chroma_format chroma_format,
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h
index 172f332712b..78aac3fa0f2 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.h
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.h
@@ -29,7 +29,7 @@
 #define vl_ycbcr_buffer_h
 
 #include <pipe/p_context.h>
-#include <pipe/p_video_context.h>
+#include <pipe/p_video_decoder.h>
 
 #include "vl_defines.h"
 
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 7d22ffb6a94..d94ac74f0e5 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -27,6 +27,7 @@
 #include "util/u_simple_list.h"
 #include "util/u_upload_mgr.h"
 #include "os/os_time.h"
+#include "vl/vl_decoder.h"
 #include "vl/vl_video_buffer.h"
 
 #include "r300_cb.h"
@@ -438,6 +439,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300_init_state_functions(r300);
     r300_init_resource_functions(r300);
     
+    r300->context.create_video_decoder = vl_create_decoder;
     r300->context.create_video_buffer = vl_video_buffer_create;
 
     r300->vbuf_mgr = u_vbuf_mgr_create(&r300->context, 1024 * 1024, 16,
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 141df11ef91..19b273f4f49 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -25,7 +25,6 @@
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
 #include "os/os_time.h"
-#include "vl/vl_context.h"
 #include "vl/vl_video_buffer.h"
 
 #include "r300_context.h"
@@ -425,14 +424,6 @@ static boolean r300_is_format_supported(struct pipe_screen* screen,
     return retval == usage;
 }
 
-static struct pipe_video_context *
-r300_video_create(struct pipe_screen *screen, struct pipe_context *pipe)
-{
-   assert(screen);
-
-   return vl_create_context(pipe);
-}
-
 static void r300_destroy_screen(struct pipe_screen* pscreen)
 {
     struct r300_screen* r300screen = r300_screen(pscreen);
@@ -533,7 +524,6 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws)
     r300screen->screen.is_format_supported = r300_is_format_supported;
     r300screen->screen.is_video_format_supported = vl_video_buffer_is_format_supported;
     r300screen->screen.context_create = r300_create_context;
-    r300screen->screen.video_context_create = r300_video_create;
     r300screen->screen.fence_reference = r300_fence_reference;
     r300screen->screen.fence_signalled = r300_fence_signalled;
     r300screen->screen.fence_finish = r300_fence_finish;
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index d8b51ea4871..76bb1883ede 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -38,7 +38,7 @@
 #include <util/u_memory.h>
 #include <util/u_inlines.h>
 #include "util/u_upload_mgr.h"
-#include <vl/vl_context.h>
+#include <vl/vl_decoder.h>
 #include <vl/vl_video_buffer.h>
 #include "os/os_time.h"
 #include <pipebuffer/pb_buffer.h>
@@ -226,6 +226,8 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 	r600_init_context_resource_functions(rctx);
 	r600_init_surface_functions(rctx);
 	rctx->context.draw_vbo = r600_draw_vbo;
+
+	rctx->context.create_video_decoder = vl_create_decoder;
 	rctx->context.create_video_buffer = vl_video_buffer_create;
 
 	switch (r600_get_family(rctx->radeon)) {
@@ -302,14 +304,6 @@ static struct pipe_context *r600_create_context(struct pipe_screen *screen, void
 	return &rctx->context;
 }
 
-static struct pipe_video_context *
-r600_video_create(struct pipe_screen *screen, struct pipe_context *pipe)
-{
-	assert(screen && pipe);
-
-	return vl_create_context(pipe);
-}
-
 /*
  * pipe_screen
  */
@@ -679,7 +673,6 @@ struct pipe_screen *r600_screen_create(struct radeon *radeon)
 	rscreen->screen.is_format_supported = r600_is_format_supported;
 	rscreen->screen.is_video_format_supported = vl_video_buffer_is_format_supported;
 	rscreen->screen.context_create = r600_create_context;
-	rscreen->screen.video_context_create = r600_video_create;
 	rscreen->screen.fence_reference = r600_fence_reference;
 	rscreen->screen.fence_signalled = r600_fence_signalled;
 	rscreen->screen.fence_finish = r600_fence_finish;
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 0e623944e59..2c43602ea1c 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -37,6 +37,7 @@
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "tgsi/tgsi_exec.h"
+#include "vl/vl_decoder.h"
 #include "vl/vl_video_buffer.h"
 #include "sp_clear.h"
 #include "sp_context.h"
@@ -260,6 +261,7 @@ softpipe_create_context( struct pipe_screen *screen,
 
    softpipe->pipe.render_condition = softpipe_render_condition;
    
+   softpipe->pipe.create_video_decoder = vl_create_decoder;
    softpipe->pipe.create_video_buffer = vl_video_buffer_create;
 
    /*
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 98147cfd3c8..f952e6046f0 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -33,7 +33,6 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 #include "draw/draw_context.h"
-#include "vl/vl_context.h"
 #include "vl/vl_video_buffer.h"
 
 #include "state_tracker/sw_winsys.h"
@@ -300,14 +299,6 @@ softpipe_flush_frontbuffer(struct pipe_screen *_screen,
       winsys->displaytarget_display(winsys, texture->dt, context_private);
 }
 
-static struct pipe_video_context *
-sp_video_create(struct pipe_screen *screen, struct pipe_context *context)
-{
-   assert(screen);
-
-   return vl_create_context(context);
-}
-
 /**
  * Create a new pipe_screen object
  * Note: we're not presently subclassing pipe_screen (no softpipe_screen).
@@ -335,7 +326,6 @@ softpipe_create_screen(struct sw_winsys *winsys)
    screen->base.is_video_format_supported = vl_video_buffer_is_format_supported;
    screen->base.context_create = softpipe_create_context;
    screen->base.flush_frontbuffer = softpipe_flush_frontbuffer;
-   screen->base.video_context_create = sp_video_create;
 
    util_format_s3tc_init();
 
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index c02b060e4bc..ac290495a43 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -59,6 +59,8 @@ struct pipe_vertex_buffer;
 struct pipe_vertex_element;
 struct pipe_viewport_state;
 
+enum pipe_video_profile;
+enum pipe_video_entrypoint;
 enum pipe_video_chroma_format;
 enum pipe_format;
 
@@ -399,6 +401,15 @@ struct pipe_context {
     */
    void (*texture_barrier)(struct pipe_context *);
    
+   /**
+    * Creates a video decoder for a specific video codec/profile
+    */
+   struct pipe_video_decoder *(*create_video_decoder)( struct pipe_context *context,
+                                                       enum pipe_video_profile profile,
+                                                       enum pipe_video_entrypoint entrypoint,
+                                                       enum pipe_video_chroma_format chroma_format,
+                                                       unsigned width, unsigned height );
+
    /**
     * Creates a video buffer as decoding target
     */
diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index 28209346c78..b77cf24d542 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -100,9 +100,6 @@ struct pipe_screen {
 
    struct pipe_context * (*context_create)( struct pipe_screen *, void *priv );
 
-   struct pipe_video_context * (*video_context_create)( struct pipe_screen *screen,
-                                                        struct pipe_context *context );
-
    /**
     * Check if the given pipe_format is supported as a texture or
     * drawing surface.
diff --git a/src/gallium/include/pipe/p_video_context.h b/src/gallium/include/pipe/p_video_context.h
deleted file mode 100644
index 0ac0c4bed0b..00000000000
--- a/src/gallium/include/pipe/p_video_context.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef PIPE_VIDEO_CONTEXT_H
-#define PIPE_VIDEO_CONTEXT_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <pipe/p_video_state.h>
-
-struct pipe_screen;
-struct pipe_surface;
-struct pipe_macroblock;
-struct pipe_picture_desc;
-struct pipe_fence_handle;
-
-/**
- * Gallium video rendering context
- */
-struct pipe_video_context
-{
-   struct pipe_screen *screen;
-
-   /**
-    * destroy context, all objects created from this context
-    * (buffers, decoders, compositors etc...) must be freed before calling this
-    */
-   void (*destroy)(struct pipe_video_context *context);
-
-   /**
-    * create a decoder for a specific video profile
-    */
-   struct pipe_video_decoder *(*create_decoder)(struct pipe_video_context *context,
-                                                enum pipe_video_profile profile,
-                                                enum pipe_video_entrypoint entrypoint,
-                                                enum pipe_video_chroma_format chroma_format,
-                                                unsigned width, unsigned height);
-
-};
-
-/**
- * decoder for a specific video codec
- */
-struct pipe_video_decoder
-{
-   struct pipe_video_context *context;
-
-   enum pipe_video_profile profile;
-   enum pipe_video_entrypoint entrypoint;
-   enum pipe_video_chroma_format chroma_format;
-   unsigned width;
-   unsigned height;
-
-   /**
-    * destroy this video decoder
-    */
-   void (*destroy)(struct pipe_video_decoder *decoder);
-
-   /**
-    * Creates a buffer as decoding input
-    */
-   struct pipe_video_decode_buffer *(*create_buffer)(struct pipe_video_decoder *decoder);
-
-   /**
-    * flush decoder buffer to video hardware
-    */
-   void (*flush_buffer)(struct pipe_video_decode_buffer *decbuf,
-                        unsigned num_ycbcr_blocks[3],
-                        struct pipe_video_buffer *ref_frames[2],
-                        struct pipe_video_buffer *dst);
-};
-
-/**
- * input buffer for a decoder
- */
-struct pipe_video_decode_buffer
-{
-   struct pipe_video_decoder *decoder;
-
-   /**
-    * destroy this decode buffer
-    */
-   void (*destroy)(struct pipe_video_decode_buffer *decbuf);
-
-   /**
-    * map the input buffer into memory before starting decoding
-    */
-   void (*begin_frame)(struct pipe_video_decode_buffer *decbuf);
-
-   /**
-    * set the quantification matrixes
-    */
-   void (*set_quant_matrix)(struct pipe_video_decode_buffer *decbuf,
-                            const uint8_t intra_matrix[64],
-                            const uint8_t non_intra_matrix[64]);
-
-   /**
-    * get the pointer where to put the ycbcr blocks of a component
-    */
-   struct pipe_ycbcr_block *(*get_ycbcr_stream)(struct pipe_video_decode_buffer *, int component);
-
-   /**
-    * get the pointer where to put the ycbcr dct block data of a component
-    */
-   short *(*get_ycbcr_buffer)(struct pipe_video_decode_buffer *, int component);
-
-   /**
-    * get the stride of the mv buffer
-    */
-   unsigned (*get_mv_stream_stride)(struct pipe_video_decode_buffer *decbuf);
-
-   /**
-    * get the pointer where to put the motion vectors of a ref frame
-    */
-   struct pipe_motionvector *(*get_mv_stream)(struct pipe_video_decode_buffer *decbuf, int ref_frame);
-
-   /**
-    * decode a bitstream
-    */
-   void (*decode_bitstream)(struct pipe_video_decode_buffer *decbuf,
-                            unsigned num_bytes, const void *data,
-                            struct pipe_mpeg12_picture_desc *picture,
-                            unsigned num_ycbcr_blocks[3]);
-
-   /**
-    * unmap decoder buffer before flushing
-    */
-   void (*end_frame)(struct pipe_video_decode_buffer *decbuf);
-};
-
-/**
- * output for decoding / input for displaying
- */
-struct pipe_video_buffer
-{
-   struct pipe_context *context;
-
-   enum pipe_format buffer_format;
-   enum pipe_video_chroma_format chroma_format;
-   unsigned width;
-   unsigned height;
-
-   /**
-    * destroy this video buffer
-    */
-   void (*destroy)(struct pipe_video_buffer *buffer);
-
-   /**
-    * get a individual sampler view for each plane
-    */
-   struct pipe_sampler_view **(*get_sampler_view_planes)(struct pipe_video_buffer *buffer);
-
-   /**
-    * get a individual sampler view for each component
-    */
-   struct pipe_sampler_view **(*get_sampler_view_components)(struct pipe_video_buffer *buffer);
-
-   /**
-    * get a individual surfaces for each plane
-    */
-   struct pipe_surface **(*get_surfaces)(struct pipe_video_buffer *buffer);
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* PIPE_VIDEO_CONTEXT_H */
diff --git a/src/gallium/include/pipe/p_video_decoder.h b/src/gallium/include/pipe/p_video_decoder.h
new file mode 100644
index 00000000000..deda992a36c
--- /dev/null
+++ b/src/gallium/include/pipe/p_video_decoder.h
@@ -0,0 +1,170 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Younes Manton.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef PIPE_VIDEO_CONTEXT_H
+#define PIPE_VIDEO_CONTEXT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <pipe/p_video_state.h>
+
+struct pipe_screen;
+struct pipe_surface;
+struct pipe_macroblock;
+struct pipe_picture_desc;
+struct pipe_fence_handle;
+
+/**
+ * Gallium video decoder for a specific codec/profile
+ */
+struct pipe_video_decoder
+{
+   struct pipe_context *context;
+
+   enum pipe_video_profile profile;
+   enum pipe_video_entrypoint entrypoint;
+   enum pipe_video_chroma_format chroma_format;
+   unsigned width;
+   unsigned height;
+
+   /**
+    * destroy this video decoder
+    */
+   void (*destroy)(struct pipe_video_decoder *decoder);
+
+   /**
+    * Creates a buffer as decoding input
+    */
+   struct pipe_video_decode_buffer *(*create_buffer)(struct pipe_video_decoder *decoder);
+
+   /**
+    * flush decoder buffer to video hardware
+    */
+   void (*flush_buffer)(struct pipe_video_decode_buffer *decbuf,
+                        unsigned num_ycbcr_blocks[3],
+                        struct pipe_video_buffer *ref_frames[2],
+                        struct pipe_video_buffer *dst);
+};
+
+/**
+ * input buffer for a decoder
+ */
+struct pipe_video_decode_buffer
+{
+   struct pipe_video_decoder *decoder;
+
+   /**
+    * destroy this decode buffer
+    */
+   void (*destroy)(struct pipe_video_decode_buffer *decbuf);
+
+   /**
+    * map the input buffer into memory before starting decoding
+    */
+   void (*begin_frame)(struct pipe_video_decode_buffer *decbuf);
+
+   /**
+    * set the quantification matrixes
+    */
+   void (*set_quant_matrix)(struct pipe_video_decode_buffer *decbuf,
+                            const uint8_t intra_matrix[64],
+                            const uint8_t non_intra_matrix[64]);
+
+   /**
+    * get the pointer where to put the ycbcr blocks of a component
+    */
+   struct pipe_ycbcr_block *(*get_ycbcr_stream)(struct pipe_video_decode_buffer *, int component);
+
+   /**
+    * get the pointer where to put the ycbcr dct block data of a component
+    */
+   short *(*get_ycbcr_buffer)(struct pipe_video_decode_buffer *, int component);
+
+   /**
+    * get the stride of the mv buffer
+    */
+   unsigned (*get_mv_stream_stride)(struct pipe_video_decode_buffer *decbuf);
+
+   /**
+    * get the pointer where to put the motion vectors of a ref frame
+    */
+   struct pipe_motionvector *(*get_mv_stream)(struct pipe_video_decode_buffer *decbuf, int ref_frame);
+
+   /**
+    * decode a bitstream
+    */
+   void (*decode_bitstream)(struct pipe_video_decode_buffer *decbuf,
+                            unsigned num_bytes, const void *data,
+                            struct pipe_mpeg12_picture_desc *picture,
+                            unsigned num_ycbcr_blocks[3]);
+
+   /**
+    * unmap decoder buffer before flushing
+    */
+   void (*end_frame)(struct pipe_video_decode_buffer *decbuf);
+};
+
+/**
+ * output for decoding / input for displaying
+ */
+struct pipe_video_buffer
+{
+   struct pipe_context *context;
+
+   enum pipe_format buffer_format;
+   enum pipe_video_chroma_format chroma_format;
+   unsigned width;
+   unsigned height;
+
+   /**
+    * destroy this video buffer
+    */
+   void (*destroy)(struct pipe_video_buffer *buffer);
+
+   /**
+    * get a individual sampler view for each plane
+    */
+   struct pipe_sampler_view **(*get_sampler_view_planes)(struct pipe_video_buffer *buffer);
+
+   /**
+    * get a individual sampler view for each component
+    */
+   struct pipe_sampler_view **(*get_sampler_view_components)(struct pipe_video_buffer *buffer);
+
+   /**
+    * get a individual surfaces for each plane
+    */
+   struct pipe_surface **(*get_surfaces)(struct pipe_video_buffer *buffer);
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PIPE_VIDEO_CONTEXT_H */
diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 8458864cfc1..4d01fe6a68e 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -25,8 +25,6 @@
  *
  **************************************************************************/
 
-#include <pipe/p_video_context.h>
-
 #include <util/u_memory.h>
 #include <util/u_math.h>
 #include <util/u_debug.h>
@@ -41,7 +39,7 @@ vlVdpDecoderCreate(VdpDevice device,
                    VdpDecoder *decoder)
 {
    enum pipe_video_profile p_profile;
-   struct pipe_video_context *vpipe;
+   struct pipe_context *pipe;
    vlVdpDevice *dev;
    vlVdpDecoder *vldecoder;
    VdpStatus ret;
@@ -63,7 +61,7 @@ vlVdpDecoderCreate(VdpDevice device,
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
 
-   vpipe = dev->context->vpipe;
+   pipe = dev->context->pipe;
 
    vldecoder = CALLOC(1,sizeof(vlVdpDecoder));
    if (!vldecoder)
@@ -72,9 +70,9 @@ vlVdpDecoderCreate(VdpDevice device,
    vldecoder->device = dev;
 
    // TODO: Define max_references. Used mainly for H264
-   vldecoder->decoder = vpipe->create_decoder
+   vldecoder->decoder = pipe->create_video_decoder
    (
-      vpipe, p_profile,
+      pipe, p_profile,
       PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
       PIPE_VIDEO_CHROMA_FORMAT_420,
       width, height
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index 30c6b7aae4b..41248cde705 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -26,7 +26,6 @@
  **************************************************************************/
 
 #include <pipe/p_compiler.h>
-#include <pipe/p_video_context.h>
 
 #include <util/u_memory.h>
 #include <util/u_debug.h>
diff --git a/src/gallium/state_trackers/vdpau/mixer.c b/src/gallium/state_trackers/vdpau/mixer.c
index ea6d50d7457..d5187006bfc 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -44,7 +44,6 @@ vlVdpVideoMixerCreate(VdpDevice device,
                       VdpVideoMixer *mixer)
 {
    vlVdpVideoMixer *vmixer = NULL;
-   struct pipe_video_context *context;
    VdpStatus ret;
    float csc[16];
 
@@ -54,8 +53,6 @@ vlVdpVideoMixerCreate(VdpDevice device,
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
 
-   context = dev->context->vpipe;
-
    vmixer = CALLOC(1, sizeof(vlVdpVideoMixer));
    if (!vmixer)
       return VDP_STATUS_RESOURCES;
diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
index b45f699b83f..fc9e02ded47 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -41,7 +41,6 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
                          VdpOutputSurface  *surface)
 {
    struct pipe_context *pipe;
-   struct pipe_video_context *context;
    struct pipe_resource res_tmpl, *res;
    struct pipe_sampler_view sv_templ;
    struct pipe_surface surf_templ;
@@ -57,8 +56,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
       return VDP_STATUS_INVALID_HANDLE;
 
    pipe = dev->context->pipe;
-   context = dev->context->vpipe;
-   if (!pipe || !context)
+   if (!pipe)
       return VDP_STATUS_INVALID_HANDLE;
 
    vlsurface = CALLOC(1, sizeof(vlVdpOutputSurface));
@@ -76,7 +74,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
    res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
    res_tmpl.usage = PIPE_USAGE_STATIC;
 
-   res = context->screen->resource_create(context->screen, &res_tmpl);
+   res = pipe->screen->resource_create(pipe->screen, &res_tmpl);
    if (!res) {
       FREE(dev);
       return VDP_STATUS_ERROR;
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index 0f87ca78972..16beb289c42 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -40,7 +40,6 @@ vlVdpPresentationQueueCreate(VdpDevice device,
                              VdpPresentationQueue *presentation_queue)
 {
    vlVdpPresentationQueue *pq = NULL;
-   struct pipe_video_context *context;
    VdpStatus ret;
 
    VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Creating PresentationQueue\n");
@@ -59,8 +58,6 @@ vlVdpPresentationQueueCreate(VdpDevice device,
    if (dev != pqt->device)
       return VDP_STATUS_HANDLE_DEVICE_MISMATCH;
 
-   context = dev->context->vpipe;
-
    pq = CALLOC(1, sizeof(vlVdpPresentationQueue));
    if (!pq)
       return VDP_STATUS_RESOURCES;
@@ -175,9 +172,9 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
    vl_compositor_render(&pq->compositor, PIPE_MPEG12_PICTURE_TYPE_FRAME,
                         drawable_surface, NULL, NULL);
 
-   pq->device->context->vpipe->screen->flush_frontbuffer
+   pq->device->context->pipe->screen->flush_frontbuffer
    (
-      pq->device->context->vpipe->screen,
+      pq->device->context->pipe->screen,
       drawable_surface->texture,
       0, 0,
       vl_contextprivate_get(pq->device->context, drawable_surface)
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index b8c4d2cd150..877d0259c56 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -28,7 +28,6 @@
 
 #include <assert.h>
 
-#include <pipe/p_video_context.h>
 #include <pipe/p_state.h>
 
 #include <util/u_memory.h>
@@ -161,7 +160,6 @@ vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface,
 {
    enum pipe_format pformat = FormatToPipe(source_ycbcr_format);
    struct pipe_context *pipe;
-   struct pipe_video_context *context;
    struct pipe_sampler_view **sampler_views;
    unsigned i;
 
@@ -173,8 +171,7 @@ vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface,
       return VDP_STATUS_INVALID_HANDLE;
 
    pipe = p_surf->device->context->pipe;
-   context = p_surf->device->context->vpipe;
-   if (!pipe && !context)
+   if (!pipe)
       return VDP_STATUS_INVALID_HANDLE;
 
    if (p_surf->video_buffer == NULL || pformat != p_surf->video_buffer->buffer_format) {
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index ada17dfadc9..8a97c99bda9 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -34,7 +34,7 @@
 #include <vdpau/vdpau_x11.h>
 
 #include <pipe/p_compiler.h>
-#include <pipe/p_video_context.h>
+#include <pipe/p_video_decoder.h>
 
 #include <util/u_debug.h>
 #include <vl/vl_compositor.h>
diff --git a/src/gallium/state_trackers/xorg/xvmc/context.c b/src/gallium/state_trackers/xorg/xvmc/context.c
index 7b74825b37e..f21ebda76d3 100644
--- a/src/gallium/state_trackers/xorg/xvmc/context.c
+++ b/src/gallium/state_trackers/xorg/xvmc/context.c
@@ -31,7 +31,7 @@
 #include <X11/extensions/XvMClib.h>
 
 #include <pipe/p_screen.h>
-#include <pipe/p_video_context.h>
+#include <pipe/p_video_decoder.h>
 #include <pipe/p_video_state.h>
 #include <pipe/p_state.h>
 
@@ -244,13 +244,14 @@ Status XvMCCreateContext(Display *dpy, XvPortID port, int surface_type_id,
       return BadAlloc;
    }
 
-   context_priv->decoder = vctx->vpipe->create_decoder(vctx->vpipe,
-                                                       ProfileToPipe(mc_type),
-                                                       (mc_type & XVMC_IDCT) ?
-                                                          PIPE_VIDEO_ENTRYPOINT_IDCT :
-                                                          PIPE_VIDEO_ENTRYPOINT_MC,
-                                                       FormatToPipe(chroma_format),
-                                                       width, height);
+   context_priv->decoder = vctx->pipe->create_video_decoder
+   (
+      vctx->pipe,
+      ProfileToPipe(mc_type),
+      (mc_type & XVMC_IDCT) ? PIPE_VIDEO_ENTRYPOINT_IDCT : PIPE_VIDEO_ENTRYPOINT_MC,
+      FormatToPipe(chroma_format),
+      width, height
+   );
 
    if (!context_priv->decoder) {
       XVMC_MSG(XVMC_ERR, "[XvMC] Could not create VL decoder.\n");
diff --git a/src/gallium/state_trackers/xorg/xvmc/subpicture.c b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
index 4ecb0e1f887..7d6ff061eb7 100644
--- a/src/gallium/state_trackers/xorg/xvmc/subpicture.c
+++ b/src/gallium/state_trackers/xorg/xvmc/subpicture.c
@@ -32,7 +32,7 @@
 #include <xorg/fourcc.h>
 
 #include <pipe/p_screen.h>
-#include <pipe/p_video_context.h>
+#include <pipe/p_video_decoder.h>
 #include <pipe/p_state.h>
 
 #include <util/u_memory.h>
@@ -224,7 +224,6 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    XvMCContextPrivate *context_priv;
    XvMCSubpicturePrivate *subpicture_priv;
    struct pipe_context *pipe;
-   struct pipe_video_context *vpipe;
    struct pipe_resource tex_templ, *tex;
    struct pipe_sampler_view sampler_templ;
    Status ret;
@@ -238,7 +237,6 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
 
    context_priv = context->privData;
    pipe = context_priv->vctx->pipe;
-   vpipe = context_priv->vctx->vpipe;
 
    if (!subpicture)
       return XvMCBadSubpicture;
@@ -259,9 +257,9 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    tex_templ.target = PIPE_TEXTURE_2D;
    tex_templ.format = XvIDToPipe(xvimage_id);
    tex_templ.last_level = 0;
-   if (vpipe->screen->get_video_param(vpipe->screen,
-                                      PIPE_VIDEO_PROFILE_UNKNOWN,
-                                      PIPE_VIDEO_CAP_NPOT_TEXTURES)) {
+   if (pipe->screen->get_video_param(pipe->screen,
+                                     PIPE_VIDEO_PROFILE_UNKNOWN,
+                                     PIPE_VIDEO_CAP_NPOT_TEXTURES)) {
       tex_templ.width0 = width;
       tex_templ.height0 = height;
    }
@@ -275,7 +273,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
    tex_templ.bind = PIPE_BIND_SAMPLER_VIEW;
    tex_templ.flags = 0;
 
-   tex = vpipe->screen->resource_create(vpipe->screen, &tex_templ);
+   tex = pipe->screen->resource_create(pipe->screen, &tex_templ);
 
    memset(&sampler_templ, 0, sizeof(sampler_templ));
    u_sampler_view_default_template(&sampler_templ, tex, tex->format);
@@ -305,7 +303,7 @@ Status XvMCCreateSubpicture(Display *dpy, XvMCContext *context, XvMCSubpicture *
       tex_templ.height0 = 1;
       tex_templ.usage = PIPE_USAGE_STATIC;
 
-      tex = vpipe->screen->resource_create(vpipe->screen, &tex_templ);
+      tex = pipe->screen->resource_create(pipe->screen, &tex_templ);
 
       memset(&sampler_templ, 0, sizeof(sampler_templ));
       u_sampler_view_default_template(&sampler_templ, tex, tex->format);
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 0370a6e858f..e8ca8152e7a 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -30,7 +30,7 @@
 
 #include <X11/Xlibint.h>
 
-#include <pipe/p_video_context.h>
+#include <pipe/p_video_decoder.h>
 #include <pipe/p_video_state.h>
 #include <pipe/p_state.h>
 
@@ -304,7 +304,6 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
 
    XvMCContextPrivate *context_priv;
    struct pipe_context *pipe;
-   struct pipe_video_context *vpipe;
    XvMCSurfacePrivate *surface_priv;
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Creating surface %p.\n", surface);
@@ -318,7 +317,6 @@ Status XvMCCreateSurface(Display *dpy, XvMCContext *context, XvMCSurface *surfac
 
    context_priv = context->privData;
    pipe = context_priv->vctx->pipe;
-   vpipe = context_priv->vctx->vpipe;
 
    surface_priv = CALLOC(1, sizeof(XvMCSurfacePrivate));
    if (!surface_priv)
@@ -357,10 +355,8 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
                          XvMCMacroBlockArray *macroblocks, XvMCBlockArray *blocks
 )
 {
-   struct pipe_video_context *vpipe;
    struct pipe_video_decode_buffer *t_buffer;
 
-   XvMCContextPrivate *context_priv;
    XvMCSurfacePrivate *target_surface_priv;
    XvMCSurfacePrivate *past_surface_priv;
    XvMCSurfacePrivate *future_surface_priv;
@@ -406,9 +402,6 @@ Status XvMCRenderSurface(Display *dpy, XvMCContext *context, unsigned int pictur
    assert(!past_surface || past_surface_priv->context == context);
    assert(!future_surface || future_surface_priv->context == context);
 
-   context_priv = context->privData;
-   vpipe = context_priv->vctx->vpipe;
-
    t_buffer = target_surface_priv->decode_buffer;
 
    // enshure that all reference frames are flushed
@@ -496,7 +489,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
 {
    static int dump_window = -1;
 
-   struct pipe_video_context *vpipe;
+   struct pipe_context *pipe;
    struct vl_compositor *compositor;
 
    XvMCSurfacePrivate *surface_priv;
@@ -522,7 +515,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    assert(srcy + srch - 1 < surface->height);
 
    subpicture_priv = surface_priv->subpicture ? surface_priv->subpicture->privData : NULL;
-   vpipe = context_priv->vctx->vpipe;
+   pipe = context_priv->vctx->pipe;
    compositor = &context_priv->compositor;
 
    if (!context_priv->drawable_surface ||
@@ -571,15 +564,15 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
    }
 
    // Workaround for r600g, there seems to be a bug in the fence refcounting code
-   vpipe->screen->fence_reference(vpipe->screen, &surface_priv->fence, NULL);
+   pipe->screen->fence_reference(pipe->screen, &surface_priv->fence, NULL);
 
    vl_compositor_render(compositor, PictureToPipe(flags), context_priv->drawable_surface, &dst_rect, &surface_priv->fence);
 
    XVMC_MSG(XVMC_TRACE, "[XvMC] Submitted surface %p for display. Pushing to front buffer.\n", surface);
 
-   vpipe->screen->flush_frontbuffer
+   pipe->screen->flush_frontbuffer
    (
-      vpipe->screen,
+      pipe->screen,
       context_priv->drawable_surface->texture,
       0, 0,
       vl_contextprivate_get(context_priv->vctx, context_priv->drawable_surface)
@@ -606,7 +599,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
 PUBLIC
 Status XvMCGetSurfaceStatus(Display *dpy, XvMCSurface *surface, int *status)
 {
-   struct pipe_video_context *vpipe;
+   struct pipe_context *pipe;
    XvMCSurfacePrivate *surface_priv;
    XvMCContextPrivate *context_priv;
 
@@ -619,12 +612,12 @@ Status XvMCGetSurfaceStatus(Display *dpy, XvMCSurface *surface, int *status)
 
    surface_priv = surface->privData;
    context_priv = surface_priv->context->privData;
-   vpipe = context_priv->vctx->vpipe;
+   pipe = context_priv->vctx->pipe;
 
    *status = 0;
 
    if (surface_priv->fence)
-      if (!vpipe->screen->fence_signalled(vpipe->screen, surface_priv->fence))
+      if (!pipe->screen->fence_signalled(pipe->screen, surface_priv->fence))
          *status |= XVMC_RENDERING;
 
    return Success;
diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
index 1d066f826db..f854e924fa4 100644
--- a/src/gallium/winsys/g3dvl/dri/dri_winsys.c
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -27,11 +27,13 @@
 
 #include <vl_winsys.h>
 #include <driclient.h>
-#include <pipe/p_video_context.h>
+#include <pipe/p_screen.h>
+#include <pipe/p_context.h>
 #include <pipe/p_state.h>
 #include <util/u_memory.h>
 #include <util/u_hash.h>
 #include <util/u_hash_table.h>
+#include <util/u_inlines.h>
 #include <state_tracker/drm_driver.h>
 #include <X11/Xlibint.h>
 
@@ -237,13 +239,6 @@ vl_video_create(struct vl_screen *vscreen)
    struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)vscreen;
    struct vl_dri_context *vl_dri_ctx;
 
-   if (!vscreen->pscreen->video_context_create) {
-      debug_printf("[G3DVL] No video support found on %s/%s.\n",
-                   vscreen->pscreen->get_vendor(vscreen->pscreen),
-                   vscreen->pscreen->get_name(vscreen->pscreen));
-      goto no_vpipe;
-   }
-
    vl_dri_ctx = CALLOC_STRUCT(vl_dri_context);
    if (!vl_dri_ctx)
       goto no_struct;
@@ -256,11 +251,6 @@ vl_video_create(struct vl_screen *vscreen)
       goto no_pipe;
    }
 
-   vl_dri_ctx->base.vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen, vl_dri_ctx->base.pipe);
-
-   if (!vl_dri_ctx->base.vpipe)
-      goto no_pipe;
-
    vl_dri_ctx->base.vscreen = vscreen;
    vl_dri_ctx->fd = vl_dri_scrn->dri_screen->fd;
 
@@ -270,7 +260,6 @@ no_pipe:
    FREE(vl_dri_ctx);
 
 no_struct:
-no_vpipe:
    return NULL;
 }
 
@@ -281,6 +270,5 @@ void vl_video_destroy(struct vl_context *vctx)
    assert(vctx);
 
    vl_dri_ctx->base.pipe->destroy(vl_dri_ctx->base.pipe);
-   vl_dri_ctx->base.vpipe->destroy(vl_dri_ctx->base.vpipe);
    FREE(vl_dri_ctx);
 }
diff --git a/src/gallium/winsys/g3dvl/vl_winsys.h b/src/gallium/winsys/g3dvl/vl_winsys.h
index 2d80c1d9b32..384a8ba5f17 100644
--- a/src/gallium/winsys/g3dvl/vl_winsys.h
+++ b/src/gallium/winsys/g3dvl/vl_winsys.h
@@ -33,7 +33,6 @@
 #include <pipe/p_format.h>
 
 struct pipe_screen;
-struct pipe_video_context;
 struct pipe_surface;
 
 struct vl_screen
@@ -45,7 +44,6 @@ struct vl_context
 {
    struct vl_screen *vscreen;
    struct pipe_context *pipe;
-   struct pipe_video_context *vpipe;
 };
 
 struct vl_screen*
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 0487bd9c560..92f0bd6da90 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -28,7 +28,6 @@
 #include <X11/Xlibint.h>
 
 #include <pipe/p_state.h>
-#include <pipe/p_video_context.h>
 
 #include <util/u_memory.h>
 #include <util/u_format.h>
@@ -173,30 +172,21 @@ struct vl_context*
 vl_video_create(struct vl_screen *vscreen)
 {
    struct pipe_context *pipe;
-   struct pipe_video_context *vpipe;
    struct vl_context *vctx;
 
    assert(vscreen);
-   assert(vscreen->pscreen->video_context_create);
 
    pipe = vscreen->pscreen->context_create(vscreen->pscreen, NULL);
    if (!pipe)
       return NULL;
 
-   vpipe = vscreen->pscreen->video_context_create(vscreen->pscreen, pipe);
-   if (!vpipe) {
-      pipe->destroy(pipe);
-      return NULL;
-   }
-
    vctx = CALLOC_STRUCT(vl_context);
    if (!vctx) {
       pipe->destroy(pipe);
-      vpipe->destroy(vpipe);
       return NULL;
    }
 
-   vctx->vpipe = vpipe;
+   vctx->pipe = pipe;
    vctx->vscreen = vscreen;
 
    return vctx;
@@ -207,6 +197,5 @@ void vl_video_destroy(struct vl_context *vctx)
    assert(vctx);
 
    vctx->pipe->destroy(vctx->pipe);
-   vctx->vpipe->destroy(vctx->vpipe);
    FREE(vctx);
 }
-- 
cgit v1.2.3


From 34145ecdad8d6d3b14dc88dafd71b689c68de36c Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Fri, 8 Jul 2011 20:53:39 +0200
Subject: vdpau: add implementation of VdpDecoderGetParameters

---
 src/gallium/state_trackers/vdpau/decode.c        | 12 ++++++++++++
 src/gallium/state_trackers/vdpau/vdpau_private.h | 22 ++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/src/gallium/state_trackers/vdpau/decode.c b/src/gallium/state_trackers/vdpau/decode.c
index 4d01fe6a68e..0696278ac3e 100644
--- a/src/gallium/state_trackers/vdpau/decode.c
+++ b/src/gallium/state_trackers/vdpau/decode.c
@@ -145,6 +145,18 @@ vlVdpDecoderGetParameters(VdpDecoder decoder,
                           uint32_t *width,
                           uint32_t *height)
 {
+   vlVdpDecoder *vldecoder;
+
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] decoder get parameters called\n");
+
+   vldecoder = (vlVdpDecoder *)vlGetDataHTAB(decoder);
+   if (!vldecoder)
+      return VDP_STATUS_INVALID_HANDLE;
+      
+   *profile = PipeToProfile(vldecoder->decoder->profile);
+   *width = vldecoder->decoder->width;
+   *height = vldecoder->decoder->height;
+   
    return VDP_STATUS_OK;
 }
 
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 8a97c99bda9..8cf9ca1f4e5 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -171,6 +171,28 @@ ProfileToPipe(VdpDecoderProfile vdpau_profile)
    }
 }
 
+static inline VdpDecoderProfile
+PipeToProfile(enum pipe_video_profile p_profile)
+{
+   switch (p_profile) {
+      case PIPE_VIDEO_PROFILE_MPEG1:
+         return VDP_DECODER_PROFILE_MPEG1;
+      case PIPE_VIDEO_PROFILE_MPEG2_SIMPLE:
+         return VDP_DECODER_PROFILE_MPEG2_SIMPLE;
+      case PIPE_VIDEO_PROFILE_MPEG2_MAIN:
+         return VDP_DECODER_PROFILE_MPEG2_MAIN;
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
+         return VDP_DECODER_PROFILE_H264_BASELINE;
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN: /* Not defined in p_format.h */
+         return VDP_DECODER_PROFILE_H264_MAIN;
+      case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
+	     return VDP_DECODER_PROFILE_H264_HIGH;
+      default:
+         assert(0);
+         return -1;
+   }
+}
+
 typedef struct
 {
    Display *display;
-- 
cgit v1.2.3


From 3ed8182c76c61ae0296ac442546df142f1d75b00 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sat, 9 Jul 2011 00:00:10 +0200
Subject: [g3dvl] stop calling dri2DestroyDrawable

When switching channels with xine it sometimes happens that xine
destroys the drawable before we get a chance to call
DRI2DestroyDrawable, resulting in an x error.
---
 src/gallium/winsys/g3dvl/dri/dri_winsys.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/src/gallium/winsys/g3dvl/dri/dri_winsys.c b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
index f854e924fa4..276731ca588 100644
--- a/src/gallium/winsys/g3dvl/dri/dri_winsys.c
+++ b/src/gallium/winsys/g3dvl/dri/dri_winsys.c
@@ -167,21 +167,6 @@ static int drawable_cmp(void *key1, void *key2)
    return d1 != d2;
 }
 
-static enum pipe_error
-drawable_destroy(void *key, void *value, void *data)
-{
-   Drawable drawable = (Drawable)key;
-   struct vl_dri_screen *vl_dri_scrn = (struct vl_dri_screen*)data;
-
-   assert(drawable != None);
-   assert(value);
-   assert(data);
-
-   dri2DestroyDrawable(vl_dri_scrn->dri_screen, drawable);
-
-   return PIPE_OK;
-}
-
 struct vl_screen*
 vl_screen_create(Display *display, int screen)
 {
@@ -226,7 +211,6 @@ void vl_screen_destroy(struct vl_screen *vscreen)
 
    assert(vscreen);
 
-   util_hash_table_foreach(vl_dri_scrn->drawable_table, drawable_destroy, vl_dri_scrn);
    util_hash_table_destroy(vl_dri_scrn->drawable_table);
    vl_dri_scrn->base.pscreen->destroy(vl_dri_scrn->base.pscreen);
    dri2DestroyScreen(vl_dri_scrn->dri_screen);
-- 
cgit v1.2.3


From 3d769619e2937cb4f2a036e82b396d9e53d65ba8 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 10 Jul 2011 13:40:04 +0200
Subject: vdpau: implement VdpOutputSurfaceGetParameters

---
 src/gallium/state_trackers/vdpau/output.c        | 14 ++++++-
 src/gallium/state_trackers/vdpau/query.c         |  2 +-
 src/gallium/state_trackers/vdpau/surface.c       |  2 +-
 src/gallium/state_trackers/vdpau/vdpau_private.h | 51 +++++++++++++++++-------
 4 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
index fc9e02ded47..f67d6ccff6b 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -135,7 +135,19 @@ vlVdpOutputSurfaceGetParameters(VdpOutputSurface surface,
                                 VdpRGBAFormat *rgba_format,
                                 uint32_t *width, uint32_t *height)
 {
-   return VDP_STATUS_NO_IMPLEMENTATION;
+   vlVdpOutputSurface *vlsurface;
+
+   VDPAU_MSG(VDPAU_TRACE, "[VDPAU] getting surface parameters\n");
+        
+   vlsurface = vlGetDataHTAB(surface);
+   if (!vlsurface)
+      return VDP_STATUS_INVALID_HANDLE;
+
+   *rgba_format = PipeToFormatRGBA(vlsurface->sampler_view->texture->format);
+   *width = vlsurface->sampler_view->texture->width0;
+   *height = vlsurface->sampler_view->texture->height0;
+
+   return VDP_STATUS_OK;
 }
 
 VdpStatus
diff --git a/src/gallium/state_trackers/vdpau/query.c b/src/gallium/state_trackers/vdpau/query.c
index abe5b8f6701..971ac672229 100644
--- a/src/gallium/state_trackers/vdpau/query.c
+++ b/src/gallium/state_trackers/vdpau/query.c
@@ -120,7 +120,7 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
 
    if (bits_ycbcr_format != VDP_YCBCR_FORMAT_Y8U8V8A8 && bits_ycbcr_format != VDP_YCBCR_FORMAT_V8U8Y8A8)
       *is_supported = vlscreen->pscreen->is_format_supported(vlscreen->pscreen,
-                                                             FormatToPipe(bits_ycbcr_format),
+                                                             FormatYCBCRToPipe(bits_ycbcr_format),
                                                              PIPE_TEXTURE_2D,
                                                              1,
                                                              PIPE_BIND_RENDER_TARGET);
diff --git a/src/gallium/state_trackers/vdpau/surface.c b/src/gallium/state_trackers/vdpau/surface.c
index 877d0259c56..d3f6b5d8bc5 100644
--- a/src/gallium/state_trackers/vdpau/surface.c
+++ b/src/gallium/state_trackers/vdpau/surface.c
@@ -158,7 +158,7 @@ vlVdpVideoSurfacePutBitsYCbCr(VdpVideoSurface surface,
                               void const *const *source_data,
                               uint32_t const *source_pitches)
 {
-   enum pipe_format pformat = FormatToPipe(source_ycbcr_format);
+   enum pipe_format pformat = FormatYCBCRToPipe(source_ycbcr_format);
    struct pipe_context *pipe;
    struct pipe_sampler_view **sampler_views;
    unsigned i;
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index 8cf9ca1f4e5..bbc426e0e27 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -84,7 +84,7 @@ PipeToChroma(enum pipe_video_chroma_format pipe_type)
 
 
 static inline enum pipe_format
-FormatToPipe(VdpYCbCrFormat vdpau_format)
+FormatYCBCRToPipe(VdpYCbCrFormat vdpau_format)
 {
    switch (vdpau_format) {
       case VDP_YCBCR_FORMAT_NV12:
@@ -106,6 +106,29 @@ FormatToPipe(VdpYCbCrFormat vdpau_format)
    return -1;
 }
 
+static inline VdpYCbCrFormat
+PipeToFormatYCBCR(enum pipe_format p_format)
+{
+   switch (p_format) {
+      case PIPE_FORMAT_NV12:
+         return VDP_YCBCR_FORMAT_NV12;
+      case PIPE_FORMAT_YV12:
+         return VDP_YCBCR_FORMAT_YV12;
+      case PIPE_FORMAT_UYVY:
+         return VDP_YCBCR_FORMAT_UYVY;
+      case PIPE_FORMAT_YUYV:
+         return VDP_YCBCR_FORMAT_YUYV;
+      //case PIPE_FORMAT_YUVA:
+        // return VDP_YCBCR_FORMAT_Y8U8V8A8;
+      case PIPE_FORMAT_VUYA:
+	 return VDP_YCBCR_FORMAT_V8U8Y8A8;
+      default:
+         assert(0);
+   }
+
+   return -1;
+}
+
 static inline enum pipe_format
 FormatRGBAToPipe(VdpRGBAFormat vdpau_format)
 {
@@ -127,22 +150,20 @@ FormatRGBAToPipe(VdpRGBAFormat vdpau_format)
    return -1;
 }
 
-static inline VdpYCbCrFormat
-PipeToFormat(enum pipe_format p_format)
+static inline VdpRGBAFormat
+PipeToFormatRGBA(enum pipe_format p_format)
 {
    switch (p_format) {
-      case PIPE_FORMAT_NV12:
-         return VDP_YCBCR_FORMAT_NV12;
-      case PIPE_FORMAT_YV12:
-         return VDP_YCBCR_FORMAT_YV12;
-      case PIPE_FORMAT_UYVY:
-         return VDP_YCBCR_FORMAT_UYVY;
-      case PIPE_FORMAT_YUYV:
-         return VDP_YCBCR_FORMAT_YUYV;
-      //case PIPE_FORMAT_YUVA:
-        // return VDP_YCBCR_FORMAT_Y8U8V8A8;
-      case PIPE_FORMAT_VUYA:
-	 return VDP_YCBCR_FORMAT_V8U8Y8A8;
+      case PIPE_FORMAT_A8_UNORM:
+         return VDP_RGBA_FORMAT_A8;
+      case PIPE_FORMAT_B10G10R10A2_UNORM:
+         return VDP_RGBA_FORMAT_B10G10R10A2;
+      case PIPE_FORMAT_B8G8R8A8_UNORM:
+         return VDP_RGBA_FORMAT_B8G8R8A8;
+      case PIPE_FORMAT_R10G10B10A2_UNORM:
+         return VDP_RGBA_FORMAT_R10G10B10A2;
+      case PIPE_FORMAT_R8G8B8A8_UNORM:
+         return VDP_RGBA_FORMAT_R8G8B8A8;
       default:
          assert(0);
    }
-- 
cgit v1.2.3


From cd4f18089e44872ce9e3c04ac5e808a7204ffc49 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Sun, 10 Jul 2011 14:13:08 +0200
Subject: vdpau: make capabilities query more sane

---
 src/gallium/state_trackers/vdpau/device.c        |  2 -
 src/gallium/state_trackers/vdpau/query.c         | 73 +++++++++++-------------
 src/gallium/state_trackers/vdpau/vdpau_private.h |  2 -
 3 files changed, 32 insertions(+), 45 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index 41248cde705..b032e83dc80 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -55,8 +55,6 @@ vdp_imp_device_create_x11(Display *display, int screen, VdpDevice *device,
       goto no_dev;
    }
 
-   dev->display = display;
-   dev->screen = screen;
    dev->vscreen = vl_screen_create(display, screen);
    if (!dev->vscreen) {
       ret = VDP_STATUS_RESOURCES;
diff --git a/src/gallium/state_trackers/vdpau/query.c b/src/gallium/state_trackers/vdpau/query.c
index 971ac672229..a32fd406bf5 100644
--- a/src/gallium/state_trackers/vdpau/query.c
+++ b/src/gallium/state_trackers/vdpau/query.c
@@ -58,44 +58,36 @@ VdpStatus
 vlVdpVideoSurfaceQueryCapabilities(VdpDevice device, VdpChromaType surface_chroma_type,
                                    VdpBool *is_supported, uint32_t *max_width, uint32_t *max_height)
 {
-   struct vl_screen *vlscreen;
+   vlVdpDevice *dev;
+   struct pipe_screen *pscreen;
    uint32_t max_2d_texture_level;
-   VdpStatus ret;
 
    VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying video surfaces\n");
 
    if (!(is_supported && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
 
-   vlVdpDevice *dev = vlGetDataHTAB(device);
+   dev = vlGetDataHTAB(device);
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
 
-   vlscreen = vl_screen_create(dev->display, dev->screen);
-   if (!vlscreen)
+   pscreen = dev->vscreen->pscreen;
+   if (!pscreen)
       return VDP_STATUS_RESOURCES;
 
    /* XXX: Current limits */
    *is_supported = true;
-   if (surface_chroma_type != VDP_CHROMA_TYPE_420)  {
-	  *is_supported = false;
-	  goto no_sup;
-   }
+   if (surface_chroma_type != VDP_CHROMA_TYPE_420)
+      *is_supported = false;
 
-   max_2d_texture_level = vlscreen->pscreen->get_param( vlscreen->pscreen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS );
-   if (!max_2d_texture_level)  {
-      ret = VDP_STATUS_RESOURCES;
-	  goto no_sup;
-   }
+   max_2d_texture_level = pscreen->get_param(pscreen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
+   if (!max_2d_texture_level)
+      return VDP_STATUS_RESOURCES;
 
    /* I am not quite sure if it is max_2d_texture_level-1 or just max_2d_texture_level */
    *max_width = *max_height = pow(2,max_2d_texture_level-1);
 
-   vl_screen_destroy(vlscreen);
-
    return VDP_STATUS_OK;
-   no_sup:
-   return ret;
 }
 
 VdpStatus
@@ -103,29 +95,28 @@ vlVdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities(VdpDevice device, VdpChromaTyp
                                                   VdpYCbCrFormat bits_ycbcr_format,
                                                   VdpBool *is_supported)
 {
-   struct vl_screen *vlscreen;
+   vlVdpDevice *dev;
+   struct pipe_screen *pscreen;
 
    VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying get put video surfaces\n");
 
    if (!is_supported)
       return VDP_STATUS_INVALID_POINTER;
 
-   vlVdpDevice *dev = vlGetDataHTAB(device);
+   dev = vlGetDataHTAB(device);
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
 
-   vlscreen = vl_screen_create(dev->display, dev->screen);
-   if (!vlscreen)
+   pscreen = dev->vscreen->pscreen;
+   if (!pscreen)
       return VDP_STATUS_RESOURCES;
 
-   if (bits_ycbcr_format != VDP_YCBCR_FORMAT_Y8U8V8A8 && bits_ycbcr_format != VDP_YCBCR_FORMAT_V8U8Y8A8)
-      *is_supported = vlscreen->pscreen->is_format_supported(vlscreen->pscreen,
-                                                             FormatYCBCRToPipe(bits_ycbcr_format),
-                                                             PIPE_TEXTURE_2D,
-                                                             1,
-                                                             PIPE_BIND_RENDER_TARGET);
-
-   vl_screen_destroy(vlscreen);
+   *is_supported = pscreen->is_video_format_supported
+   (
+      pscreen,
+      FormatYCBCRToPipe(bits_ycbcr_format),
+      PIPE_VIDEO_PROFILE_UNKNOWN
+   );
 
    return VDP_STATUS_OK;
 }
@@ -135,38 +126,40 @@ vlVdpDecoderQueryCapabilities(VdpDevice device, VdpDecoderProfile profile,
                               VdpBool *is_supported, uint32_t *max_level, uint32_t *max_macroblocks,
                               uint32_t *max_width, uint32_t *max_height)
 {
+   vlVdpDevice *dev;
+   struct pipe_screen *pscreen;
+
    enum pipe_video_profile p_profile;
    uint32_t max_decode_width;
    uint32_t max_decode_height;
    uint32_t max_2d_texture_level;
-   struct vl_screen *vlscreen;
 
    VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying decoder\n");
 
    if (!(is_supported && max_level && max_macroblocks && max_width && max_height))
       return VDP_STATUS_INVALID_POINTER;
 
-   vlVdpDevice *dev = vlGetDataHTAB(device);
+   dev = vlGetDataHTAB(device);
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
 
-   vlscreen = vl_screen_create(dev->display, dev->screen);
-   if (!vlscreen)
+   pscreen = dev->vscreen->pscreen;
+   if (!pscreen)
       return VDP_STATUS_RESOURCES;
 
    p_profile = ProfileToPipe(profile);
    if (p_profile == PIPE_VIDEO_PROFILE_UNKNOWN)	{
-	   *is_supported = false;
-	   return VDP_STATUS_OK;
+      *is_supported = false;
+      return VDP_STATUS_OK;
    }
 
    if (p_profile != PIPE_VIDEO_PROFILE_MPEG2_SIMPLE && p_profile != PIPE_VIDEO_PROFILE_MPEG2_MAIN)  {
-	   *is_supported = false;
-	   return VDP_STATUS_OK;
+      *is_supported = false;
+      return VDP_STATUS_OK;
    }
 
    /* XXX hack, need to implement something more sane when the decoders have been implemented */
-   max_2d_texture_level = vlscreen->pscreen->get_param( vlscreen->pscreen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS );
+   max_2d_texture_level = pscreen->get_param(pscreen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
    max_decode_width = max_decode_height = pow(2,max_2d_texture_level-2);
    if (!(max_decode_width && max_decode_height))
       return VDP_STATUS_RESOURCES;
@@ -177,8 +170,6 @@ vlVdpDecoderQueryCapabilities(VdpDevice device, VdpDecoderProfile profile,
    *max_level = 16;
    *max_macroblocks = (max_decode_width/16) * (max_decode_height/16);
 
-   vl_screen_destroy(vlscreen);
-
    return VDP_STATUS_OK;
 }
 
diff --git a/src/gallium/state_trackers/vdpau/vdpau_private.h b/src/gallium/state_trackers/vdpau/vdpau_private.h
index bbc426e0e27..e5d945629fb 100644
--- a/src/gallium/state_trackers/vdpau/vdpau_private.h
+++ b/src/gallium/state_trackers/vdpau/vdpau_private.h
@@ -216,8 +216,6 @@ PipeToProfile(enum pipe_video_profile p_profile)
 
 typedef struct
 {
-   Display *display;
-   int screen;
    struct vl_screen *vscreen;
    struct vl_context *context;
 } vlVdpDevice;
-- 
cgit v1.2.3


From f8898a70dfe4396993e1c69e451544fa2cd5c2e7 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 11 Jul 2011 11:06:05 +0200
Subject: [g3dvl] remove pipe_video_context from nouveau

---
 src/gallium/drivers/nvfx/Makefile             |  3 +-
 src/gallium/drivers/nvfx/nvfx_context.c       |  5 +++
 src/gallium/drivers/nvfx/nvfx_screen.c        | 18 +++++++++--
 src/gallium/drivers/nvfx/nvfx_video_context.c | 44 ---------------------------
 src/gallium/drivers/nvfx/nvfx_video_context.h | 36 ----------------------
 5 files changed, 22 insertions(+), 84 deletions(-)
 delete mode 100644 src/gallium/drivers/nvfx/nvfx_video_context.c
 delete mode 100644 src/gallium/drivers/nvfx/nvfx_video_context.h

diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile
index cd37f0111e2..a3b76ac61b1 100644
--- a/src/gallium/drivers/nvfx/Makefile
+++ b/src/gallium/drivers/nvfx/Makefile
@@ -24,8 +24,7 @@ C_SOURCES = \
 	nvfx_surface.c \
 	nvfx_transfer.c \
 	nvfx_vbo.c \
-	nvfx_vertprog.c \
-	nvfx_video_context.c
+	nvfx_vertprog.c
 
 LIBRARY_INCLUDES = \
 	$(LIBDRM_CFLAGS) \
diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c
index 98603bedde1..e2cdcf636f9 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_context.c
@@ -1,6 +1,8 @@
 #include "draw/draw_context.h"
 #include "pipe/p_defines.h"
 #include "util/u_framebuffer.h"
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
 
 #include "nvfx_context.h"
 #include "nvfx_screen.h"
@@ -88,6 +90,9 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
 	nvfx->pipe.clear = nvfx_clear;
 	nvfx->pipe.flush = nvfx_flush;
 
+	nvfx->pipe.create_video_decoder = vl_create_decoder;
+	nvfx->pipe.create_video_buffer = vl_video_buffer_create;
+
 	nvfx->is_nv4x = screen->is_nv4x;
 	nvfx->use_nv4x = screen->use_nv4x;
 	/* TODO: it seems that nv30 might have fixed function clipping usable with vertex programs
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index 0140470d576..4901e3b2bf7 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -3,11 +3,11 @@
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_simple_screen.h"
+#include "vl/vl_video_buffer.h"
 
 #include "nouveau/nouveau_screen.h"
 #include "nouveau/nv_object.xml.h"
 #include "nvfx_context.h"
-#include "nvfx_video_context.h"
 #include "nvfx_screen.h"
 #include "nvfx_resource.h"
 #include "nvfx_tex.h"
@@ -207,6 +207,19 @@ nvfx_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_cap param)
 	}
 }
 
+static int
+nvfx_screen_get_video_param(struct pipe_screen *screen,
+				enum pipe_video_profile profile,
+				enum pipe_video_cap param)
+{
+	switch (param) {
+	case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+		return 0;
+	default:
+		return 0;
+	}
+}
+
 static boolean
 nvfx_screen_is_format_supported(struct pipe_screen *pscreen,
 				     enum pipe_format format,
@@ -467,9 +480,10 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	pscreen->get_param = nvfx_screen_get_param;
 	pscreen->get_shader_param = nvfx_screen_get_shader_param;
 	pscreen->get_paramf = nvfx_screen_get_paramf;
+	pscreen->get_video_param = nvfx_screen_get_video_param;
 	pscreen->is_format_supported = nvfx_screen_is_format_supported;
+	pscreen->is_video_format_supported = vl_video_buffer_is_format_supported;
 	pscreen->context_create = nvfx_create;
-	pscreen->video_context_create = nvfx_video_create;
 
 	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 4096, &screen->fence);
 	if (ret) {
diff --git a/src/gallium/drivers/nvfx/nvfx_video_context.c b/src/gallium/drivers/nvfx/nvfx_video_context.c
deleted file mode 100644
index ff9931b5409..00000000000
--- a/src/gallium/drivers/nvfx/nvfx_video_context.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "nvfx_video_context.h"
-#include "util/u_video.h"
-#include <vl/vl_context.h>
-
-struct pipe_video_context *
-nvfx_video_create(struct pipe_screen *screen, void *priv)
-{
-   struct pipe_context *pipe;
-
-   assert(screen);
-
-   pipe = screen->context_create(screen, priv);
-   if (!pipe)
-      return NULL;
-
-   return vl_create_context(pipe, true);
-}
diff --git a/src/gallium/drivers/nvfx/nvfx_video_context.h b/src/gallium/drivers/nvfx/nvfx_video_context.h
deleted file mode 100644
index b220b9f82dc..00000000000
--- a/src/gallium/drivers/nvfx/nvfx_video_context.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef __NVFX_VIDEO_CONTEXT_H__
-#define __NVFX_VIDEO_CONTEXT_H__
-
-#include <pipe/p_video_context.h>
-
-struct pipe_video_context *
-nvfx_video_create(struct pipe_screen *screen, void *priv);
-
-#endif
-- 
cgit v1.2.3


From e45d8958244919ccfccd72527cdf7e96e996c781 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 11 Jul 2011 11:08:45 +0200
Subject: [g3dvl] remove some stale nv40 files

---
 src/gallium/drivers/nv40/nv40_video_context.c | 44 ---------------------------
 src/gallium/drivers/nv40/nv40_video_context.h | 36 ----------------------
 2 files changed, 80 deletions(-)
 delete mode 100644 src/gallium/drivers/nv40/nv40_video_context.c
 delete mode 100644 src/gallium/drivers/nv40/nv40_video_context.h

diff --git a/src/gallium/drivers/nv40/nv40_video_context.c b/src/gallium/drivers/nv40/nv40_video_context.c
deleted file mode 100644
index cd231e434a5..00000000000
--- a/src/gallium/drivers/nv40/nv40_video_context.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#include "nv40_video_context.h"
-#include "util/u_video.h"
-#include <vl/vl_context.h>
-
-struct pipe_video_context *
-nv40_video_create(struct pipe_screen *screen, void *priv)
-{
-   struct pipe_context *pipe;
-
-   assert(screen);
-
-   pipe = screen->context_create(screen, priv);
-   if (!pipe)
-      return NULL;
-
-   return vl_create_context(pipe, true);
-}
diff --git a/src/gallium/drivers/nv40/nv40_video_context.h b/src/gallium/drivers/nv40/nv40_video_context.h
deleted file mode 100644
index d34ab7ab130..00000000000
--- a/src/gallium/drivers/nv40/nv40_video_context.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2009 Younes Manton.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
-#ifndef __NV40_VIDEO_CONTEXT_H__
-#define __NV40_VIDEO_CONTEXT_H__
-
-#include <pipe/p_video_context.h>
-
-struct pipe_video_context *
-nv40_video_create(struct pipe_screen *screen, void *priv);
-
-#endif
-- 
cgit v1.2.3


From d6eb1f82570659b7b5a562faa06d804e63a0fc64 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 11 Jul 2011 11:28:23 +0200
Subject: pipe-video: merge fixes

---
 configs/linux-dri                          |   5 +-
 configs/linux-dri-x86-64                   |   2 +-
 src/gallium/tests/trivial/.gitignore       |   3 +
 src/gallium/tests/unit/Makefile            |  49 ++++++++++++
 src/gallium/tests/unit/pipe_barrier_test.c |  86 ++++++++++++++++++++
 src/gallium/tests/unit/u_cache_test.c      | 121 +++++++++++++++++++++++++++++
 src/gallium/tests/unit/u_half_test.c       |  32 ++++++++
 7 files changed, 294 insertions(+), 4 deletions(-)
 create mode 100644 src/gallium/tests/trivial/.gitignore
 create mode 100644 src/gallium/tests/unit/Makefile
 create mode 100644 src/gallium/tests/unit/pipe_barrier_test.c
 create mode 100644 src/gallium/tests/unit/u_cache_test.c
 create mode 100644 src/gallium/tests/unit/u_half_test.c

diff --git a/configs/linux-dri b/configs/linux-dri
index e88d3f0ca72..c4132f47271 100644
--- a/configs/linux-dri
+++ b/configs/linux-dri
@@ -59,10 +59,9 @@ SRC_DIRS := glx egl $(SRC_DIRS)
 EGL_DRIVERS_DIRS = glx
 
 DRIVER_DIRS = dri
-
 GALLIUM_WINSYS_DIRS = sw sw/xlib drm/vmware drm/intel drm/i965
-GALLIUM_TARGET_DIRS = egl-swrast
-GALLIUM_STATE_TRACKERS_DIRS = egl vdpau
+GALLIUM_TARGET_DIRS = 
+GALLIUM_STATE_TRACKERS_DIRS = egl
 
 DRI_DIRS = i810 i915 i965 mach64 mga nouveau r128 r200 r300 r600 radeon \
 	savage sis tdfx unichrome swrast
diff --git a/configs/linux-dri-x86-64 b/configs/linux-dri-x86-64
index 90e6c215adb..656cf6140d7 100644
--- a/configs/linux-dri-x86-64
+++ b/configs/linux-dri-x86-64
@@ -20,5 +20,5 @@ EXTRA_LIB_PATH=-L/usr/X11R6/lib64
 # the new interface.  i810 are missing because there is no x86-64
 # system where they could *ever* be used.
 #
-DRI_DIRS = swrast
+DRI_DIRS = i915 i965 mach64 mga r128 r200 r300 radeon savage tdfx unichrome
 
diff --git a/src/gallium/tests/trivial/.gitignore b/src/gallium/tests/trivial/.gitignore
new file mode 100644
index 00000000000..af6cdedbeba
--- /dev/null
+++ b/src/gallium/tests/trivial/.gitignore
@@ -0,0 +1,3 @@
+tri
+quad-tex
+result.bmp
diff --git a/src/gallium/tests/unit/Makefile b/src/gallium/tests/unit/Makefile
new file mode 100644
index 00000000000..bb3039f3bc7
--- /dev/null
+++ b/src/gallium/tests/unit/Makefile
@@ -0,0 +1,49 @@
+# progs/gallium/simple/Makefile
+
+TOP = ../../../..
+include $(TOP)/configs/current
+
+INCLUDES = \
+	-I. \
+	-I$(TOP)/src/gallium/include \
+	-I$(TOP)/src/gallium/auxiliary \
+	-I$(TOP)/src/gallium/drivers \
+	-I$(TOP)/src/gallium/winsys \
+	$(PROG_INCLUDES)
+
+LINKS = \
+	$(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/winsys/sw/null/libws_null.a \
+	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
+	$(GALLIUM_AUXILIARIES) \
+	$(PROG_LINKS)
+
+SOURCES = \
+	pipe_barrier_test.c \
+	u_cache_test.c \
+	u_half_test.c \
+	u_format_test.c \
+	u_format_compatible_test.c \
+	translate_test.c
+
+
+OBJECTS = $(SOURCES:.c=.o)
+
+PROGS = $(OBJECTS:.o=)
+
+##### TARGETS #####
+
+default: $(PROGS)
+
+clean:
+	-rm -f $(PROGS)
+	-rm -f *.o
+	-rm -f result.bmp
+
+##### RULES #####
+
+$(OBJECTS): %.o: %.c
+	$(CC) -c $(INCLUDES) $(CFLAGS) $(DEFINES) $(PROG_DEFINES) $< -o $@
+
+$(PROGS): %: %.o
+	$(CC) $(LDFLAGS) $< $(LINKS) -lm -lpthread -ldl -o $@
diff --git a/src/gallium/tests/unit/pipe_barrier_test.c b/src/gallium/tests/unit/pipe_barrier_test.c
new file mode 100644
index 00000000000..f5d72b0abae
--- /dev/null
+++ b/src/gallium/tests/unit/pipe_barrier_test.c
@@ -0,0 +1,86 @@
+/**************************************************************************
+ *
+ * Copyright 2009-2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/*
+ *  Test case for pipe_barrier.
+ *
+ *  The test succeeds if no thread exits before all the other threads reach
+ *  the barrier.
+ */
+
+
+#include <stdio.h>
+
+#include "os/os_thread.h"
+#include "os/os_time.h"
+
+
+#define NUM_THREADS 10
+
+static pipe_thread threads[NUM_THREADS];
+static pipe_barrier barrier;
+static int thread_ids[NUM_THREADS];
+
+
+static PIPE_THREAD_ROUTINE(thread_function, thread_data)
+{
+   int thread_id = *((int *) thread_data);
+
+   printf("thread %d starting\n", thread_id);
+   os_time_sleep(thread_id * 1000 * 1000);
+   printf("thread %d before barrier\n", thread_id);
+   pipe_barrier_wait(&barrier);
+   printf("thread %d exiting\n", thread_id);
+
+   return NULL;
+}
+
+
+int main()
+{
+   int i;
+
+   printf("pipe_barrier_test starting\n");
+
+   pipe_barrier_init(&barrier, NUM_THREADS);
+
+   for (i = 0; i < NUM_THREADS; i++) {
+      thread_ids[i] = i;
+      threads[i] = pipe_thread_create(thread_function, (void *) &thread_ids[i]);
+   }
+
+   for (i = 0; i < NUM_THREADS; i++ ) {
+      pipe_thread_wait(threads[i]);
+   }
+
+   pipe_barrier_destroy(&barrier);
+
+   printf("pipe_barrier_test exiting\n");
+
+   return 0;
+}
diff --git a/src/gallium/tests/unit/u_cache_test.c b/src/gallium/tests/unit/u_cache_test.c
new file mode 100644
index 00000000000..0b62a765230
--- /dev/null
+++ b/src/gallium/tests/unit/u_cache_test.c
@@ -0,0 +1,121 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/*
+ * Test case for u_cache.
+ */
+
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "util/u_cache.h"
+#include "util/u_hash.h"
+
+
+typedef uint32_t cache_test_key;
+typedef uint32_t cache_test_value;
+
+
+static uint32_t
+cache_test_hash(const void *key)
+{
+   return util_hash_crc32(key, sizeof(cache_test_key));
+}
+
+
+static void
+cache_test_destroy(void *key, void *value)
+{
+   free(key);
+   free(value);
+}
+
+
+static int
+cache_test_compare(const void *key1, const void *key2) {
+   return !(key1 == key2);
+}
+
+
+int main() {
+   unsigned cache_size;
+   unsigned cache_count;
+
+   for (cache_size = 2; cache_size < (1 << 15); cache_size *= 2) {
+      for (cache_count = (cache_size << 5); cache_count < (cache_size << 10); cache_count *= 2) {
+         struct util_cache * cache;
+         cache_test_key *key;
+         cache_test_value *value_in;
+         cache_test_value *value_out;
+         int i;
+
+         printf("Testing cache size of %d with %d values.\n", cache_size, cache_count);
+
+         cache = util_cache_create(cache_test_hash,
+                                   cache_test_compare,
+                                   cache_test_destroy,
+                                   cache_size);
+
+         /*
+          * Retrieve a value from an empty cache.
+          */
+         key = malloc(sizeof(cache_test_key));
+         *key = 0xdeadbeef;
+         value_out = (cache_test_value *) util_cache_get(cache, key);
+         assert(value_out == NULL);
+         free(key);
+
+
+         /*
+          * Repeatedly insert into and retrieve values from the cache.
+          */
+         for (i = 0; i < cache_count; i++) {
+            key = malloc(sizeof(cache_test_key));
+            value_in = malloc(sizeof(cache_test_value));
+
+            *key = rand();
+            *value_in = rand();
+            util_cache_set(cache, key, value_in);
+
+            value_out = util_cache_get(cache, key);
+            assert(value_out != NULL);
+            assert(value_in == value_out);
+            assert(*value_in == *value_out);
+         }
+
+         /*
+          * In debug builds, this will trigger a self-check by the cache of
+          * the distribution of hits in its internal cache entries.
+          */
+         util_cache_destroy(cache);
+      }
+   }
+
+   return 0;
+}
diff --git a/src/gallium/tests/unit/u_half_test.c b/src/gallium/tests/unit/u_half_test.c
new file mode 100644
index 00000000000..00bda7f50a6
--- /dev/null
+++ b/src/gallium/tests/unit/u_half_test.c
@@ -0,0 +1,32 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <float.h>
+
+#include "util/u_math.h"
+#include "util/u_half.h"
+
+int
+main(int argc, char **argv)
+{
+   unsigned i;
+   unsigned roundtrip_fails = 0;
+   for(i = 0; i < 1 << 16; ++i)
+   {
+      uint16_t h = (uint16_t) i;
+      union fi f;
+      uint16_t rh;
+      f.ui = util_half_to_floatui(h);
+      rh = util_floatui_to_half(f.ui);
+      if(h != rh)
+      {
+	 printf("Roundtrip failed: %x -> %x = %f -> %x\n", h, f.ui, f.f, rh);
+	 ++roundtrip_fails;
+      }
+   }
+
+   if(roundtrip_fails)
+      printf("Failure! %u/65536 half floats failed a conversion to float and back.\n", roundtrip_fails);
+   else
+      printf("Success!\n");
+   return 0;
+}
-- 
cgit v1.2.3


From 358795bc0bacdba9a36bb010ef18ee1b2d086f2d Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 11 Jul 2011 11:39:11 +0200
Subject: r600g: revert "support textures with scaled number formats"

We should add those probably when merged to master.
---
 src/gallium/drivers/r600/r600_texture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index 63cacbbd50c..a6161e5f281 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -1018,7 +1018,7 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen,
 	case UTIL_FORMAT_TYPE_SIGNED:
 		if (!desc->channel[i].normalized &&
 		    desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) {
-			word4 |= S_038010_NUM_FORMAT_ALL(V_038010_SQ_NUM_FORMAT_SCALED);
+			goto out_unknown;
 		}
 
 		switch (desc->channel[i].size) {
-- 
cgit v1.2.3


From 96718d98deb28d5be6610ff6d827a747fddd5890 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 11 Jul 2011 12:31:54 +0200
Subject: [g3dvl] softpipe is no longer needed for dri-nouveau

---
 src/gallium/targets/dri-nouveau/Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/targets/dri-nouveau/Makefile b/src/gallium/targets/dri-nouveau/Makefile
index 26c927e0a81..eb1ee859a00 100644
--- a/src/gallium/targets/dri-nouveau/Makefile
+++ b/src/gallium/targets/dri-nouveau/Makefile
@@ -11,7 +11,6 @@ PIPE_DRIVERS = \
 	$(TOP)/src/gallium/drivers/nvfx/libnvfx.a \
 	$(TOP)/src/gallium/drivers/nv50/libnv50.a \
 	$(TOP)/src/gallium/drivers/nvc0/libnvc0.a \
-	$(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \
 	$(TOP)/src/gallium/drivers/nouveau/libnouveau.a
 
 C_SOURCES = \
-- 
cgit v1.2.3


From 3d1057f243efe6e2d7b11a65df6d5124c469cec7 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 11 Jul 2011 13:55:56 +0200
Subject: [g3dvl] sync SConscript with Makefile

---
 src/gallium/auxiliary/SConscript | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
index 58d78afe133..d18f55f1644 100644
--- a/src/gallium/auxiliary/SConscript
+++ b/src/gallium/auxiliary/SConscript
@@ -195,10 +195,16 @@ source = [
     'util/u_transfer.c',
     'util/u_upload_mgr.c',
     'util/u_vbuf_mgr.c',
-    'vl/vl_bitstream_parser.c',
-    'vl/vl_mpeg12_mc_renderer.c',
-    'vl/vl_compositor.c',
     'vl/vl_csc.c',
+    'vl/vl_compositor.c',
+    'vl/vl_decoder.c',
+    'vl/vl_mpeg12_decoder.c',
+    'vl/vl_mpeg12_bitstream.c',
+    'vl/vl_zscan.c',
+    'vl/vl_idct.c',
+    'vl/vl_mc.c',
+    'vl/vl_vertex_buffers.c',
+    'vl/vl_video_buffer.c',
 ]
 
 if env['llvm']:
-- 
cgit v1.2.3


From a7ec477ebc8b256381854f1cef0ec03a2d713555 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 11 Jul 2011 14:36:03 +0200
Subject: [g3dvl] revert some unintended white space changes

---
 src/gallium/include/pipe/p_screen.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h
index b77cf24d542..87fdb20510d 100644
--- a/src/gallium/include/pipe/p_screen.h
+++ b/src/gallium/include/pipe/p_screen.h
@@ -1,8 +1,8 @@
 /**************************************************************************
- *
+ * 
  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
- *
+ * 
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
  * "Software"), to deal in the Software without restriction, including
@@ -10,11 +10,11 @@
  * distribute, sub license, and/or sell copies of the Software, and to
  * permit persons to whom the Software is furnished to do so, subject to
  * the following conditions:
- *
+ * 
  * The above copyright notice and this permission notice (including the
  * next paragraph) shall be included in all copies or substantial portions
  * of the Software.
- *
+ * 
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
@@ -22,12 +22,12 @@
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
+ * 
  **************************************************************************/
 
 /**
  * @file
- *
+ * 
  * Screen, Adapter or GPU
  *
  * These are driver functions/facilities that are context independent.
@@ -96,9 +96,12 @@ struct pipe_screen {
     * Query an integer-valued capability/parameter/limit for a codec/profile
     * \param param  one of PIPE_VIDEO_CAP_x
     */
-   int (*get_video_param)( struct pipe_screen *, enum pipe_video_profile profile, enum pipe_video_cap param );
+   int (*get_video_param)( struct pipe_screen *,
+			   enum pipe_video_profile profile,
+			   enum pipe_video_cap param );
 
-   struct pipe_context * (*context_create)( struct pipe_screen *, void *priv );
+   struct pipe_context * (*context_create)( struct pipe_screen *,
+					    void *priv );
 
    /**
     * Check if the given pipe_format is supported as a texture or
@@ -110,7 +113,7 @@ struct pipe_screen {
                                    enum pipe_texture_target target,
                                    unsigned sample_count,
                                    unsigned bindings );
-                                   
+
    /**
     * Check if the given pipe_format is supported as output for this codec/profile.
     * \param profile  profile to check, may also be PIPE_VIDEO_PROFILE_UNKNOWN
-- 
cgit v1.2.3


From df5e0b9435c869f88234a69db9bfe97342b027d4 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Mon, 11 Jul 2011 16:29:02 +0200
Subject: [g3dvl] fix a whole bunch of memory leaks

---
 src/gallium/auxiliary/vl/vl_idct.c              | 12 ++++++++----
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c    |  1 +
 src/gallium/auxiliary/vl/vl_video_buffer.c      |  2 ++
 src/gallium/state_trackers/vdpau/device.c       |  3 +++
 src/gallium/state_trackers/vdpau/output.c       |  5 +++++
 src/gallium/state_trackers/vdpau/presentation.c |  2 ++
 src/gallium/state_trackers/xorg/xvmc/surface.c  |  4 ++++
 7 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 45180499e2e..744a35603d7 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -749,7 +749,8 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe,
                   struct pipe_sampler_view *matrix,
                   struct pipe_sampler_view *transpose)
 {
-   assert(idct && pipe && matrix);
+   assert(idct && pipe);
+   assert(matrix && transpose);
 
    idct->pipe = pipe;
    idct->buffer_width = buffer_width;
@@ -777,6 +778,7 @@ vl_idct_cleanup(struct vl_idct *idct)
    cleanup_state(idct);
 
    pipe_sampler_view_reference(&idct->matrix, NULL);
+   pipe_sampler_view_reference(&idct->transpose, NULL);
 }
 
 bool
@@ -784,9 +786,8 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
                     struct pipe_sampler_view *source,
                     struct pipe_sampler_view *intermediate)
 {
-   assert(buffer);
-   assert(idct);
-   assert(source);
+   assert(buffer && idct);
+   assert(source && intermediate);
 
    memset(buffer, 0, sizeof(struct vl_idct_buffer));
 
@@ -811,6 +812,9 @@ vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
 
    cleanup_source(idct, buffer);
    cleanup_intermediate(idct, buffer);
+
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL);
+   pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL);
 }
 
 void
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index b866e0e5aec..7bb5a695e40 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -447,6 +447,7 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder)
 
    pipe_resource_reference(&dec->quads.buffer, NULL);
    pipe_resource_reference(&dec->pos.buffer, NULL);
+   pipe_resource_reference(&dec->block_num.buffer, NULL);
 
    pipe_sampler_view_reference(&dec->zscan_linear, NULL);
    pipe_sampler_view_reference(&dec->zscan_normal, NULL);
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
index 49b7b50cfee..8b05749659f 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -102,6 +102,8 @@ vl_video_buffer_destroy(struct pipe_video_buffer *buffer)
       pipe_sampler_view_reference(&buf->sampler_view_components[i], NULL);
       pipe_resource_reference(&buf->resources[i], NULL);
    }
+
+   FREE(buffer);
 }
 
 static struct pipe_sampler_view **
diff --git a/src/gallium/state_trackers/vdpau/device.c b/src/gallium/state_trackers/vdpau/device.c
index b032e83dc80..200d5f62f63 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -151,6 +151,9 @@ vlVdpDeviceDestroy(VdpDevice device)
    vlVdpDevice *dev = vlGetDataHTAB(device);
    if (!dev)
       return VDP_STATUS_INVALID_HANDLE;
+      
+   vl_video_destroy(dev->context);
+   vl_screen_destroy(dev->vscreen);
 
    FREE(dev);
    vlDestroyHTAB();
diff --git a/src/gallium/state_trackers/vdpau/output.c b/src/gallium/state_trackers/vdpau/output.c
index f67d6ccff6b..bc4b39ae75c 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -88,6 +88,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
 
    vlsurface->sampler_view = pipe->create_sampler_view(pipe, res, &sv_templ);
    if (!vlsurface->sampler_view) {
+      pipe_resource_reference(&res, NULL);
       FREE(dev);
       return VDP_STATUS_ERROR;
    }
@@ -97,15 +98,19 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
    surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
    vlsurface->surface = pipe->create_surface(pipe, res, &surf_templ);
    if (!vlsurface->surface) {
+      pipe_resource_reference(&res, NULL);
       FREE(dev);
       return VDP_STATUS_ERROR;
    }
 
    *surface = vlAddDataHTAB(vlsurface);
    if (*surface == 0) {
+      pipe_resource_reference(&res, NULL);
       FREE(dev);
       return VDP_STATUS_ERROR;
    }
+   
+   pipe_resource_reference(&res, NULL);
 
    return VDP_STATUS_OK;
 }
diff --git a/src/gallium/state_trackers/vdpau/presentation.c b/src/gallium/state_trackers/vdpau/presentation.c
index 16beb289c42..1176c7a30b7 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -192,6 +192,8 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
       if (system(cmd) != 0)
          VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Dumping surface %d failed.\n", surface);
    }
+   
+   pipe_surface_reference(&drawable_surface, NULL);
 
    return VDP_STATUS_OK;
 }
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index e8ca8152e7a..aef1eff2bb3 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -522,6 +522,7 @@ Status XvMCPutSurface(Display *dpy, XvMCSurface *surface, Drawable drawable,
        context_priv->dst_rect.x != dst_rect.x || context_priv->dst_rect.y != dst_rect.y ||
        context_priv->dst_rect.w != dst_rect.w || context_priv->dst_rect.h != dst_rect.h) {
 
+      pipe_surface_reference(&context_priv->drawable_surface, NULL);
       context_priv->drawable_surface = vl_drawable_surface_get(context_priv->vctx, drawable);
       context_priv->dst_rect = dst_rect;
       vl_compositor_reset_dirty_area(compositor);
@@ -636,6 +637,9 @@ Status XvMCDestroySurface(Display *dpy, XvMCSurface *surface)
       return XvMCBadSurface;
 
    surface_priv = surface->privData;
+   
+   if (surface_priv->mapped)
+      surface_priv->decode_buffer->end_frame(surface_priv->decode_buffer);
    surface_priv->decode_buffer->destroy(surface_priv->decode_buffer);
    surface_priv->video_buffer->destroy(surface_priv->video_buffer);
    FREE(surface_priv);
-- 
cgit v1.2.3


From efc7fda4627919b5355952d955ee4a2c98505e56 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 12 Jul 2011 00:12:12 +0200
Subject: [g3dvl] add some more PIPE_VIDEO_CAPs

---
 src/gallium/auxiliary/vl/vl_decoder.c      | 12 +++++++++++
 src/gallium/auxiliary/vl/vl_decoder.h      |  6 ++++++
 src/gallium/auxiliary/vl/vl_video_buffer.c | 10 ++++++++++
 src/gallium/auxiliary/vl/vl_video_buffer.h |  6 ++++++
 src/gallium/drivers/nvfx/nvfx_screen.c     |  6 ++++++
 src/gallium/drivers/r300/r300_screen.c     | 18 +++++++++++------
 src/gallium/drivers/r600/r600_pipe.c       |  5 +++++
 src/gallium/drivers/softpipe/sp_screen.c   |  6 ++++++
 src/gallium/include/pipe/p_defines.h       |  5 ++++-
 src/gallium/state_trackers/vdpau/query.c   | 32 +++++++++++-------------------
 10 files changed, 79 insertions(+), 27 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_decoder.c b/src/gallium/auxiliary/vl/vl_decoder.c
index 2be5c17ed3e..fac03359a0f 100644
--- a/src/gallium/auxiliary/vl/vl_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_decoder.c
@@ -32,6 +32,18 @@
 #include "vl_decoder.h"
 #include "vl_mpeg12_decoder.h"
 
+bool
+vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile)
+{
+   assert(screen);
+   switch (u_reduce_video_profile(profile)) {
+      case PIPE_VIDEO_CODEC_MPEG12:
+         return true;
+      default:
+         return false;
+   }
+}
+
 struct pipe_video_decoder *
 vl_create_decoder(struct pipe_context *pipe,
                   enum pipe_video_profile profile,
diff --git a/src/gallium/auxiliary/vl/vl_decoder.h b/src/gallium/auxiliary/vl/vl_decoder.h
index 440f5ecfb04..0e9280dbfa2 100644
--- a/src/gallium/auxiliary/vl/vl_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_decoder.h
@@ -31,6 +31,12 @@
 
 #include <pipe/p_video_decoder.h>
 
+/**
+ * check if a given profile is supported with shader based decoding
+ */
+bool
+vl_profile_supported(struct pipe_screen *screen, enum pipe_video_profile profile);
+
 /**
  * standard implementation of pipe->create_video_decoder
  */
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.c b/src/gallium/auxiliary/vl/vl_video_buffer.c
index 8b05749659f..4d8b6649dd2 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.c
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.c
@@ -88,6 +88,16 @@ vl_video_buffer_is_format_supported(struct pipe_screen *screen,
    return true;
 }
 
+unsigned
+vl_video_buffer_max_size(struct pipe_screen *screen)
+{
+   uint32_t max_2d_texture_level;
+
+   max_2d_texture_level = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
+
+   return 1 << (max_2d_texture_level-1);
+}
+
 static void
 vl_video_buffer_destroy(struct pipe_video_buffer *buffer)
 {
diff --git a/src/gallium/auxiliary/vl/vl_video_buffer.h b/src/gallium/auxiliary/vl/vl_video_buffer.h
index 78aac3fa0f2..291d15c1e9d 100644
--- a/src/gallium/auxiliary/vl/vl_video_buffer.h
+++ b/src/gallium/auxiliary/vl/vl_video_buffer.h
@@ -54,6 +54,12 @@ struct vl_video_buffer
 const enum pipe_format *
 vl_video_buffer_formats(struct pipe_screen *screen, enum pipe_format format);
 
+/**
+ * get maximum size of video buffers
+ */
+unsigned
+vl_video_buffer_max_size(struct pipe_screen *screen);
+
 /**
  * check if video buffer format is supported for a codec/profile
  * can be used as default implementation of screen->is_video_format_supported
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index 4901e3b2bf7..d85c0a6da41 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -3,6 +3,7 @@
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_simple_screen.h"
+#include "vl/vl_decoder.h"
 #include "vl/vl_video_buffer.h"
 
 #include "nouveau/nouveau_screen.h"
@@ -213,8 +214,13 @@ nvfx_screen_get_video_param(struct pipe_screen *screen,
 				enum pipe_video_cap param)
 {
 	switch (param) {
+	case PIPE_VIDEO_CAP_SUPPORTED:
+		return vl_profile_supported(screen, profile);
 	case PIPE_VIDEO_CAP_NPOT_TEXTURES:
 		return 0;
+	case PIPE_VIDEO_CAP_MAX_WIDTH:
+	case PIPE_VIDEO_CAP_MAX_HEIGHT:
+		return vl_video_buffer_max_size(screen);
 	default:
 		return 0;
 	}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 19b273f4f49..c8df45fb3e7 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -25,6 +25,7 @@
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
 #include "os/os_time.h"
+#include "vl/vl_decoder.h"
 #include "vl/vl_video_buffer.h"
 
 #include "r300_context.h"
@@ -307,12 +308,17 @@ static int r300_get_video_param(struct pipe_screen *screen,
 				enum pipe_video_profile profile,
 				enum pipe_video_cap param)
 {
-	switch (param) {
-	case PIPE_VIDEO_CAP_NPOT_TEXTURES:
-		return 0;
-	default:
-		return 0;
-	}
+   switch (param) {
+      case PIPE_VIDEO_CAP_SUPPORTED:
+         return vl_profile_supported(screen, profile);
+      case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+         return 0;
+      case PIPE_VIDEO_CAP_MAX_WIDTH:
+      case PIPE_VIDEO_CAP_MAX_HEIGHT:
+         return vl_video_buffer_max_size(screen);
+      default:
+         return 0;
+   }
 }
 
 static boolean r300_is_format_supported(struct pipe_screen* screen,
diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index 8e492787235..65b12de79b1 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -491,8 +491,13 @@ static int r600_get_video_param(struct pipe_screen *screen,
 				enum pipe_video_cap param)
 {
 	switch (param) {
+	case PIPE_VIDEO_CAP_SUPPORTED:
+		return vl_profile_supported(screen, profile);
 	case PIPE_VIDEO_CAP_NPOT_TEXTURES:
 		return 1;
+	case PIPE_VIDEO_CAP_MAX_WIDTH:
+	case PIPE_VIDEO_CAP_MAX_HEIGHT:
+		return vl_video_buffer_max_size(screen);
 	default:
 		return 0;
 	}
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index f952e6046f0..1e58d27be88 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -33,6 +33,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 #include "draw/draw_context.h"
+#include "vl/vl_decoder.h"
 #include "vl/vl_video_buffer.h"
 
 #include "state_tracker/sw_winsys.h"
@@ -177,8 +178,13 @@ softpipe_get_video_param(struct pipe_screen *screen,
                          enum pipe_video_cap param)
 {
    switch (param) {
+   case PIPE_VIDEO_CAP_SUPPORTED:
+       return vl_profile_supported(screen, profile);
    case PIPE_VIDEO_CAP_NPOT_TEXTURES:
       return 0;
+   case PIPE_VIDEO_CAP_MAX_WIDTH:
+   case PIPE_VIDEO_CAP_MAX_HEIGHT:
+      return vl_video_buffer_max_size(screen);
    default:
       return 0;
    }
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index d8b1a9e171f..7f1bf0dee68 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -496,7 +496,10 @@ enum pipe_shader_cap
 /* Video caps, can be different for each codec/profile */
 enum pipe_video_cap
 {
-   PIPE_VIDEO_CAP_NPOT_TEXTURES = 0,
+   PIPE_VIDEO_CAP_SUPPORTED = 0,
+   PIPE_VIDEO_CAP_NPOT_TEXTURES = 1,
+   PIPE_VIDEO_CAP_MAX_WIDTH = 2,
+   PIPE_VIDEO_CAP_MAX_HEIGHT = 3,
 };
 
 enum pipe_video_codec
diff --git a/src/gallium/state_trackers/vdpau/query.c b/src/gallium/state_trackers/vdpau/query.c
index a32fd406bf5..ec17e59118f 100644
--- a/src/gallium/state_trackers/vdpau/query.c
+++ b/src/gallium/state_trackers/vdpau/query.c
@@ -128,11 +128,7 @@ vlVdpDecoderQueryCapabilities(VdpDevice device, VdpDecoderProfile profile,
 {
    vlVdpDevice *dev;
    struct pipe_screen *pscreen;
-
    enum pipe_video_profile p_profile;
-   uint32_t max_decode_width;
-   uint32_t max_decode_height;
-   uint32_t max_2d_texture_level;
 
    VDPAU_MSG(VDPAU_TRACE, "[VDPAU] Querying decoder\n");
 
@@ -152,24 +148,20 @@ vlVdpDecoderQueryCapabilities(VdpDevice device, VdpDecoderProfile profile,
       *is_supported = false;
       return VDP_STATUS_OK;
    }
-
-   if (p_profile != PIPE_VIDEO_PROFILE_MPEG2_SIMPLE && p_profile != PIPE_VIDEO_PROFILE_MPEG2_MAIN)  {
-      *is_supported = false;
-      return VDP_STATUS_OK;
+   
+   *is_supported = pscreen->get_video_param(pscreen, p_profile, PIPE_VIDEO_CAP_SUPPORTED);
+   if (*is_supported) {
+      *max_width = pscreen->get_video_param(pscreen, p_profile, PIPE_VIDEO_CAP_MAX_WIDTH); 
+      *max_height = pscreen->get_video_param(pscreen, p_profile, PIPE_VIDEO_CAP_MAX_HEIGHT);
+      *max_level = 16;
+      *max_macroblocks = (*max_width/16)*(*max_height/16);
+   } else {
+      *max_width = 0;
+      *max_height = 0;
+      *max_level = 0;
+      *max_macroblocks = 0;
    }
 
-   /* XXX hack, need to implement something more sane when the decoders have been implemented */
-   max_2d_texture_level = pscreen->get_param(pscreen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS);
-   max_decode_width = max_decode_height = pow(2,max_2d_texture_level-2);
-   if (!(max_decode_width && max_decode_height))
-      return VDP_STATUS_RESOURCES;
-
-   *is_supported = true;
-   *max_width = max_decode_width;
-   *max_height = max_decode_height;
-   *max_level = 16;
-   *max_macroblocks = (max_decode_width/16) * (max_decode_height/16);
-
    return VDP_STATUS_OK;
 }
 
-- 
cgit v1.2.3


From 5e5d7acc2f93ddad8212b00c845dbd6381ee4119 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Tue, 12 Jul 2011 11:08:12 +0200
Subject: vaapi: cleanup the source and let the st at least compile again

---
 src/gallium/state_trackers/va/ftab.c          | 194 +++++++++++++-------------
 src/gallium/state_trackers/va/htab.c          |  22 ++-
 src/gallium/state_trackers/va/va_buffer.c     |  76 +++++-----
 src/gallium/state_trackers/va/va_config.c     | 136 +++++++++---------
 src/gallium/state_trackers/va/va_context.c    | 109 +++++++--------
 src/gallium/state_trackers/va/va_display.c    |  45 +++---
 src/gallium/state_trackers/va/va_image.c      | 175 ++++++++++-------------
 src/gallium/state_trackers/va/va_picture.c    |  35 +++--
 src/gallium/state_trackers/va/va_private.h    |  19 +--
 src/gallium/state_trackers/va/va_subpicture.c | 148 +++++++++-----------
 src/gallium/state_trackers/va/va_surface.c    | 169 ++++++++++------------
 11 files changed, 517 insertions(+), 611 deletions(-)

diff --git a/src/gallium/state_trackers/va/ftab.c b/src/gallium/state_trackers/va/ftab.c
index 999287e7a7e..dc9513e2d7c 100644
--- a/src/gallium/state_trackers/va/ftab.c
+++ b/src/gallium/state_trackers/va/ftab.c
@@ -26,111 +26,111 @@
  **************************************************************************/
 
 #include <assert.h>
+
 #include <va/va.h>
 #include <va/va_backend.h>
-#include "va_private.h"
 
-struct VADriverVTable vlVaGetVtable();
+#include "va_private.h"
 
 static struct VADriverVTable vtable =
 {
-	&vlVaTerminate, /* VAStatus (*vaTerminate) ( VADriverContextP ctx ); */
-	&vlVaQueryConfigProfiles, /* VAStatus (*vaQueryConfigProfiles) ( VADriverContextP ctx, VAProfile *profile_list,int *num_profiles); */
-	&vlVaQueryConfigEntrypoints, /* VAStatus (*vaQueryConfigEntrypoints) ( VADriverContextP ctx,	VAProfile profile, VAEntrypoint  *entrypoint_list, int *num_entrypoints	); */
-	&vlVaGetConfigAttributes, /* VAStatus (*vaGetConfigAttributes) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint, VAConfigAttrib *attrib_list, int num_attribs ); */
-	&vlVaCreateConfig, /* VAStatus (*vaCreateConfig) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint,	VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id); */
-	&vlVaDestroyConfig, /* VAStatus (*vaDestroyConfig) ( VADriverContextP ctx, VAConfigID config_id); */
-	&vlVaQueryConfigAttributes, /* VAStatus (*vaQueryConfigAttributes) ( VADriverContextP ctx, VAConfigID config_id, VAProfile *profile, VAEntrypoint *entrypoint, VAConfigAttrib *attrib_list, int *num_attribs); */
-	&vlVaCreateSurfaces, /* VAStatus (*vaCreateSurfaces) ( VADriverContextP ctx,int width,int height,int format,int num_surfaces,VASurfaceID *surfaces); */
-	&vlVaDestroySurfaces, /* VAStatus (*vaDestroySurfaces) ( VADriverContextP ctx, VASurfaceID *surface_list, int num_surfaces ); */
-	&vlVaCreateContext, /* VAStatus (*vaCreateContext) (VADriverContextP ctx,VAConfigID config_id,int picture_width,int picture_height,int flag,VASurfaceID *render_targets,int num_render_targets,VAContextID *context); */
-	&vlVaDestroyContext, /* VAStatus (*vaDestroyContext) (VADriverContextP ctx,VAContextID context); */
-	&vlVaCreateBuffer, /* VAStatus (*vaCreateBuffer) (VADriverContextP ctx,VAContextID context,VABufferType type,unsigned int size,unsigned int num_elements,void *data,VABufferID *buf_id); */
-	&vlVaBufferSetNumElements, /* VAStatus (*vaBufferSetNumElements) (VADriverContextP ctx,VABufferID buf_id,unsigned int num_elements); */
-	&vlVaMapBuffer, /* VAStatus (*vaMapBuffer) (VADriverContextP ctx,VABufferID buf_id,void **pbuf); */
-	&vlVaUnmapBuffer, /* VAStatus (*vaUnmapBuffer) (VADriverContextP ctx,VABufferID buf_id); */
-	&vlVaDestroyBuffer, /* VAStatus (*vaDestroyBuffer) (VADriverContextP ctx,VABufferID buffer_id); */
-	&vlVaBeginPicture, /* VAStatus (*vaBeginPicture) (VADriverContextP ctx,VAContextID context,VASurfaceID render_target); */
-	&vlVaRenderPicture, /* VAStatus (*vaRenderPicture) (VADriverContextP ctx,VAContextID context,VABufferID *buffers,int num_buffers); */
-	&vlVaEndPicture, /* VAStatus (*vaEndPicture) (VADriverContextP ctx,VAContextID context); */
-	&vlVaSyncSurface, /* VAStatus (*vaSyncSurface) (VADriverContextP ctx,VASurfaceID render_target); */
-	&vlVaQuerySurfaceStatus, /* VAStatus (*vaQuerySurfaceStatus) (VADriverContextP ctx,VASurfaceID render_target,VASurfaceStatus *status); */
-	&vlVaPutSurface, /* VAStatus (*vaPutSurface) (
-    		VADriverContextP ctx,
-		VASurfaceID surface,
-		void* draw,
-		short srcx,
-		short srcy,
-		unsigned short srcw,
-		unsigned short srch,
-		short destx,
-		short desty,
-		unsigned short destw,
-		unsigned short desth,
-		VARectangle *cliprects,
-		unsigned int number_cliprects,
-		unsigned int flags); */
-	&vlVaQueryImageFormats, /* VAStatus (*vaQueryImageFormats) ( VADriverContextP ctx, VAImageFormat *format_list,int *num_formats); */
-	&vlVaCreateImage, /* VAStatus (*vaCreateImage) (VADriverContextP ctx,VAImageFormat *format,int width,int height,VAImage *image); */
-	&vlVaDeriveImage, /* VAStatus (*vaDeriveImage) (VADriverContextP ctx,VASurfaceID surface,VAImage *image); */
-	&vlVaDestroyImage, /* VAStatus (*vaDestroyImage) (VADriverContextP ctx,VAImageID image); */
-	&vlVaSetImagePalette, /* VAStatus (*vaSetImagePalette) (VADriverContextP ctx,VAImageID image, unsigned char *palette); */
-	&vlVaGetImage, /* VAStatus (*vaGetImage) (VADriverContextP ctx,VASurfaceID surface,int x,int y,unsigned int width,unsigned int height,VAImageID image); */
-	&vlVaPutImage, /* VAStatus (*vaPutImage) (
-		VADriverContextP ctx,
-		VASurfaceID surface,
-		VAImageID image,
-		int src_x,
-		int src_y,
-		unsigned int src_width,
-		unsigned int src_height,
-		int dest_x,
-		int dest_y,
-		unsigned int dest_width,
-		unsigned int dest_height
-	); */
-	&vlVaQuerySubpictureFormats,	/* VAStatus (*vaQuerySubpictureFormats) (VADriverContextP ctx,VAImageFormat *format_list,unsigned int *flags,unsigned int *num_formats); */
-	&vlVaCreateSubpicture, /* VAStatus (*vaCreateSubpicture) (VADriverContextP ctx,VAImageID image,VASubpictureID *subpicture); */
-	&vlVaDestroySubpicture, /* VAStatus (*vaDestroySubpicture) (VADriverContextP ctx,VASubpictureID subpicture); */
-	&vlVaSubpictureImage, /* VAStatus (*vaSetSubpictureImage) (VADriverContextP ctx,VASubpictureID subpicture,VAImageID image); */
-	&vlVaSetSubpictureChromakey, /* VAStatus (*vaSetSubpictureChromakey) (VADriverContextP ctx,VASubpictureID subpicture,unsigned int chromakey_min,unsigned int chromakey_max,unsigned int chromakey_mask); */
-	&vlVaSetSubpictureGlobalAlpha, /* VAStatus (*vaSetSubpictureGlobalAlpha) (VADriverContextP ctx,VASubpictureID subpicture,float global_alpha); */
-	&vlVaAssociateSubpicture, /* VAStatus (*vaAssociateSubpicture) (
-		VADriverContextP ctx,
-		VASubpictureID subpicture,
-		VASurfaceID *target_surfaces,
-		int num_surfaces,
-		short src_x,
-		short src_y,
-		unsigned short src_width,
-		unsigned short src_height,
-		short dest_x,
-		short dest_y,
-		unsigned short dest_width,
-		unsigned short dest_height,
-		unsigned int flags); */
-	&vlVaDeassociateSubpicture, /* VAStatus (*vaDeassociateSubpicture) (VADriverContextP ctx,VASubpictureID subpicture,VASurfaceID *target_surfaces,int num_surfaces); */
-	&vlVaQueryDisplayAttributes, /* VAStatus (*vaQueryDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int *num_attributes); */
-	&vlVaGetDisplayAttributes, /* VAStatus (*vaGetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
-	&vlVaSetDisplayAttributes, /* VAStatus (*vaSetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
-	&vlVaBufferInfo, /* VAStatus (*vaBufferInfo) (VADriverContextP ctx,VAContextID context,VABufferID buf_id,VABufferType *type,unsigned int *size,unsigned int *num_elements); */
-	&vlVaLockSurface, /* VAStatus (*vaLockSurface) (
-		VADriverContextP ctx,
-                VASurfaceID surface,
-                unsigned int *fourcc,
-                unsigned int *luma_stride,
-                unsigned int *chroma_u_stride,
-                unsigned int *chroma_v_stride,
-                unsigned int *luma_offset,
-                unsigned int *chroma_u_offset,
-                unsigned int *chroma_v_offset,
-                unsigned int *buffer_name,
-                void **buffer); */
-	&vlVaUnlockSurface, /* VAStatus (*vaUnlockSurface) (VADriverContextP ctx,VASurfaceID surface); */
-	NULL /* struct VADriverVTableGLX *glx; "Optional" */
+   &vlVaTerminate, /* VAStatus (*vaTerminate) ( VADriverContextP ctx ); */
+   &vlVaQueryConfigProfiles, /* VAStatus (*vaQueryConfigProfiles) ( VADriverContextP ctx, VAProfile *profile_list,int *num_profiles); */
+   &vlVaQueryConfigEntrypoints, /* VAStatus (*vaQueryConfigEntrypoints) ( VADriverContextP ctx,	VAProfile profile, VAEntrypoint  *entrypoint_list, int *num_entrypoints	); */
+   &vlVaGetConfigAttributes, /* VAStatus (*vaGetConfigAttributes) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint, VAConfigAttrib *attrib_list, int num_attribs ); */
+   &vlVaCreateConfig, /* VAStatus (*vaCreateConfig) ( VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint,	VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id); */
+   &vlVaDestroyConfig, /* VAStatus (*vaDestroyConfig) ( VADriverContextP ctx, VAConfigID config_id); */
+   &vlVaQueryConfigAttributes, /* VAStatus (*vaQueryConfigAttributes) ( VADriverContextP ctx, VAConfigID config_id, VAProfile *profile, VAEntrypoint *entrypoint, VAConfigAttrib *attrib_list, int *num_attribs); */
+   &vlVaCreateSurfaces, /* VAStatus (*vaCreateSurfaces) ( VADriverContextP ctx,int width,int height,int format,int num_surfaces,VASurfaceID *surfaces); */
+   &vlVaDestroySurfaces, /* VAStatus (*vaDestroySurfaces) ( VADriverContextP ctx, VASurfaceID *surface_list, int num_surfaces ); */
+   &vlVaCreateContext, /* VAStatus (*vaCreateContext) (VADriverContextP ctx,VAConfigID config_id,int picture_width,int picture_height,int flag,VASurfaceID *render_targets,int num_render_targets,VAContextID *context); */
+   &vlVaDestroyContext, /* VAStatus (*vaDestroyContext) (VADriverContextP ctx,VAContextID context); */
+   &vlVaCreateBuffer, /* VAStatus (*vaCreateBuffer) (VADriverContextP ctx,VAContextID context,VABufferType type,unsigned int size,unsigned int num_elements,void *data,VABufferID *buf_id); */
+   &vlVaBufferSetNumElements, /* VAStatus (*vaBufferSetNumElements) (VADriverContextP ctx,VABufferID buf_id,unsigned int num_elements); */
+   &vlVaMapBuffer, /* VAStatus (*vaMapBuffer) (VADriverContextP ctx,VABufferID buf_id,void **pbuf); */
+   &vlVaUnmapBuffer, /* VAStatus (*vaUnmapBuffer) (VADriverContextP ctx,VABufferID buf_id); */
+   &vlVaDestroyBuffer, /* VAStatus (*vaDestroyBuffer) (VADriverContextP ctx,VABufferID buffer_id); */
+   &vlVaBeginPicture, /* VAStatus (*vaBeginPicture) (VADriverContextP ctx,VAContextID context,VASurfaceID render_target); */
+   &vlVaRenderPicture, /* VAStatus (*vaRenderPicture) (VADriverContextP ctx,VAContextID context,VABufferID *buffers,int num_buffers); */
+   &vlVaEndPicture, /* VAStatus (*vaEndPicture) (VADriverContextP ctx,VAContextID context); */
+   &vlVaSyncSurface, /* VAStatus (*vaSyncSurface) (VADriverContextP ctx,VASurfaceID render_target); */
+   &vlVaQuerySurfaceStatus, /* VAStatus (*vaQuerySurfaceStatus) (VADriverContextP ctx,VASurfaceID render_target,VASurfaceStatus *status); */
+   &vlVaPutSurface, /* VAStatus (*vaPutSurface) (
+      VADriverContextP ctx,
+      VASurfaceID surface,
+      void* draw,
+      short srcx,
+      short srcy,
+      unsigned short srcw,
+      unsigned short srch,
+      short destx,
+      short desty,
+      unsigned short destw,
+      unsigned short desth,
+      VARectangle *cliprects,
+      unsigned int number_cliprects,
+      unsigned int flags); */
+   &vlVaQueryImageFormats, /* VAStatus (*vaQueryImageFormats) ( VADriverContextP ctx, VAImageFormat *format_list,int *num_formats); */
+   &vlVaCreateImage, /* VAStatus (*vaCreateImage) (VADriverContextP ctx,VAImageFormat *format,int width,int height,VAImage *image); */
+   &vlVaDeriveImage, /* VAStatus (*vaDeriveImage) (VADriverContextP ctx,VASurfaceID surface,VAImage *image); */
+   &vlVaDestroyImage, /* VAStatus (*vaDestroyImage) (VADriverContextP ctx,VAImageID image); */
+   &vlVaSetImagePalette, /* VAStatus (*vaSetImagePalette) (VADriverContextP ctx,VAImageID image, unsigned char *palette); */
+   &vlVaGetImage, /* VAStatus (*vaGetImage) (VADriverContextP ctx,VASurfaceID surface,int x,int y,unsigned int width,unsigned int height,VAImageID image); */
+   &vlVaPutImage, /* VAStatus (*vaPutImage) (
+      VADriverContextP ctx,
+      VASurfaceID surface,
+      VAImageID image,
+      int src_x,
+      int src_y,
+      unsigned int src_width,
+      unsigned int src_height,
+      int dest_x,
+      int dest_y,
+      unsigned int dest_width,
+      unsigned int dest_height
+   ); */
+   &vlVaQuerySubpictureFormats,	/* VAStatus (*vaQuerySubpictureFormats) (VADriverContextP ctx,VAImageFormat *format_list,unsigned int *flags,unsigned int *num_formats); */
+   &vlVaCreateSubpicture, /* VAStatus (*vaCreateSubpicture) (VADriverContextP ctx,VAImageID image,VASubpictureID *subpicture); */
+   &vlVaDestroySubpicture, /* VAStatus (*vaDestroySubpicture) (VADriverContextP ctx,VASubpictureID subpicture); */
+   &vlVaSubpictureImage, /* VAStatus (*vaSetSubpictureImage) (VADriverContextP ctx,VASubpictureID subpicture,VAImageID image); */
+   &vlVaSetSubpictureChromakey, /* VAStatus (*vaSetSubpictureChromakey) (VADriverContextP ctx,VASubpictureID subpicture,unsigned int chromakey_min,unsigned int chromakey_max,unsigned int chromakey_mask); */
+   &vlVaSetSubpictureGlobalAlpha, /* VAStatus (*vaSetSubpictureGlobalAlpha) (VADriverContextP ctx,VASubpictureID subpicture,float global_alpha); */
+   &vlVaAssociateSubpicture, /* VAStatus (*vaAssociateSubpicture) (
+      VADriverContextP ctx,
+      VASubpictureID subpicture,
+      VASurfaceID *target_surfaces,
+      int num_surfaces,
+      short src_x,
+      short src_y,
+      unsigned short src_width,
+      unsigned short src_height,
+      short dest_x,
+      short dest_y,
+      unsigned short dest_width,
+      unsigned short dest_height,
+      unsigned int flags); */
+   &vlVaDeassociateSubpicture, /* VAStatus (*vaDeassociateSubpicture) (VADriverContextP ctx,VASubpictureID subpicture,VASurfaceID *target_surfaces,int num_surfaces); */
+   &vlVaQueryDisplayAttributes, /* VAStatus (*vaQueryDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int *num_attributes); */
+   &vlVaGetDisplayAttributes, /* VAStatus (*vaGetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
+   &vlVaSetDisplayAttributes, /* VAStatus (*vaSetDisplayAttributes) (VADriverContextP ctx,VADisplayAttribute *attr_list,int num_attributes); */
+   &vlVaBufferInfo, /* VAStatus (*vaBufferInfo) (VADriverContextP ctx,VAContextID context,VABufferID buf_id,VABufferType *type,unsigned int *size,unsigned int *num_elements); */
+   &vlVaLockSurface, /* VAStatus (*vaLockSurface) (
+      VADriverContextP ctx,
+      VASurfaceID surface,
+      unsigned int *fourcc,
+      unsigned int *luma_stride,
+      unsigned int *chroma_u_stride,
+      unsigned int *chroma_v_stride,
+      unsigned int *luma_offset,
+      unsigned int *chroma_u_offset,
+      unsigned int *chroma_v_offset,
+      unsigned int *buffer_name,
+      void **buffer); */
+   &vlVaUnlockSurface, /* VAStatus (*vaUnlockSurface) (VADriverContextP ctx,VASurfaceID surface); */
+   NULL /* struct VADriverVTableGLX *glx; "Optional" */
 };
 
 struct VADriverVTable vlVaGetVtable()
 {
-	return vtable;
+   return vtable;
 }
diff --git a/src/gallium/state_trackers/va/htab.c b/src/gallium/state_trackers/va/htab.c
index 2187507c6a4..40b9edac819 100644
--- a/src/gallium/state_trackers/va/htab.c
+++ b/src/gallium/state_trackers/va/htab.c
@@ -27,24 +27,20 @@
 
 #include <util/u_handle_table.h>
 #include <os/os_thread.h>
-#include "va_private.h"
 
-boolean vlCreateHTAB(void);
-void vlDestroyHTAB(void);
-vlHandle vlAddDataHTAB(void *data);
-void* vlGetDataHTAB(vlHandle handle);
+#include "va_private.h"
 
 #ifdef VL_HANDLES
 static struct handle_table *htab = NULL;
 pipe_static_mutex(htab_lock);
 #endif
 
-boolean vlCreateHTAB(void)
+bool vlCreateHTAB(void)
 {
 #ifdef VL_HANDLES
-   boolean ret;
-   /* Make sure handle table handles match VDPAU handles. */
-   assert(sizeof(unsigned) <= sizeof(vlHandle));
+   bool ret;
+   /* Make sure handle table handles match VAAPI handles. */
+   assert(sizeof(unsigned) <= sizeof(VAGenericID));
    pipe_mutex_lock(htab_lock);
    if (!htab)
       htab = handle_table_create();
@@ -68,22 +64,22 @@ void vlDestroyHTAB(void)
 #endif
 }
 
-vlHandle vlAddDataHTAB(void *data)
+VAGenericID vlAddDataHTAB(void *data)
 {
    assert(data);
 #ifdef VL_HANDLES
-   vlHandle handle = 0;
+   VAGenericID handle = 0;
    pipe_mutex_lock(htab_lock);
    if (htab)
       handle = handle_table_add(htab, data);
    pipe_mutex_unlock(htab_lock);
    return handle;
 #else
-   return (vlHandle)data;
+   return (VAGenericID)data;
 #endif
 }
 
-void* vlGetDataHTAB(vlHandle handle)
+void* vlGetDataHTAB(VAGenericID handle)
 {
    assert(handle);
 #ifdef VL_HANDLES
diff --git a/src/gallium/state_trackers/va/va_buffer.c b/src/gallium/state_trackers/va/va_buffer.c
index 7608a4264ff..d14e06d69e4 100644
--- a/src/gallium/state_trackers/va/va_buffer.c
+++ b/src/gallium/state_trackers/va/va_buffer.c
@@ -27,70 +27,62 @@
 
 #include <va/va.h>
 #include <va/va_backend.h>
-#include "va_private.h"
 
+#include "va_private.h"
 
-VAStatus vlVaCreateBuffer(		VADriverContextP ctx,
-                                VAContextID context,
-                                VABufferType type,
-                                unsigned int size,
-                                unsigned int num_elements,
-                                void *data,
-                                VABufferID *buf_id)
+VAStatus
+vlVaCreateBuffer(VADriverContextP ctx, VAContextID context, VABufferType type,
+                 unsigned int size, unsigned int num_elements, void *data,
+                 VABufferID *buf_id)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaBufferSetNumElements(	VADriverContextP ctx,
-                                    VABufferID buf_id,
-                                    unsigned int num_elements)
+VAStatus
+vlVaBufferSetNumElements(VADriverContextP ctx, VABufferID buf_id, unsigned int num_elements)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaMapBuffer(			VADriverContextP ctx,
-                                VABufferID buf_id,
-                                void **pbuff)
+VAStatus
+vlVaMapBuffer(VADriverContextP ctx, VABufferID buf_id, void **pbuff)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaUnmapBuffer(		VADriverContextP ctx,
-                                VABufferID buf_id)
+VAStatus
+vlVaUnmapBuffer(VADriverContextP ctx, VABufferID buf_id)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaDestroyBuffer(		VADriverContextP ctx,
-                                    VABufferID buffer_id)
+VAStatus
+vlVaDestroyBuffer(VADriverContextP ctx, VABufferID buffer_id)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaBufferInfo(		VADriverContextP ctx,
-                                VAContextID context,
-                                VABufferID buf_id,
-                                VABufferType *type,
-                                unsigned int *size,
-                                unsigned int *num_elements)
+VAStatus
+vlVaBufferInfo(VADriverContextP ctx, VAContextID context, VABufferID buf_id,
+               VABufferType *type, unsigned int *size, unsigned int *num_elements)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
diff --git a/src/gallium/state_trackers/va/va_config.c b/src/gallium/state_trackers/va/va_config.c
index 1589abf7cfa..6f1cb78bb53 100644
--- a/src/gallium/state_trackers/va/va_config.c
+++ b/src/gallium/state_trackers/va/va_config.c
@@ -27,105 +27,95 @@
 
 #include <va/va.h>
 #include <va/va_backend.h>
+
 #include <util/u_debug.h>
+
 #include "va_private.h"
 
-VAStatus vlVaQueryConfigProfiles(       VADriverContextP ctx,
-                                   VAProfile *profile_list,
-                                   int *num_profiles)
+VAStatus
+vlVaQueryConfigProfiles(VADriverContextP ctx, VAProfile *profile_list, int *num_profiles)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	int i = 0;
+   int i = 0;
 
-    profile_list[i++] = VAProfileMPEG2Simple;
-	*num_profiles = i;
+   profile_list[i++] = VAProfileMPEG2Simple;
+   *num_profiles = i;
 
-	return VA_STATUS_SUCCESS;
+   return VA_STATUS_SUCCESS;
 }
 
 
-VAStatus vlVaQueryConfigEntrypoints(       VADriverContextP ctx,
-                                      VAProfile profile,
-                                      VAEntrypoint *entrypoint_list,
-                                      int *num_entrypoints)
+VAStatus
+vlVaQueryConfigEntrypoints(VADriverContextP ctx, VAProfile profile,
+                           VAEntrypoint *entrypoint_list, int *num_entrypoints)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 		
-	VAStatus vaStatus = VA_STATUS_SUCCESS;
-
-    switch (profile) {
-    case VAProfileMPEG2Simple:
-    case VAProfileMPEG2Main:
-		VA_INFO("Using profile %08x\n",profile);
-        *num_entrypoints = 1;
-        entrypoint_list[0] = VAEntrypointMoComp;
-        break;
-
-    case VAProfileH264Baseline:
-    case VAProfileH264Main:
-    case VAProfileH264High:
-        vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
-        *num_entrypoints = 0;
-        break;
-
-    default:
-		VA_ERROR("Unsupported profile %08x\n",profile);
-        vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
-        *num_entrypoints = 0;
-        break;
-    }
-
-    return vaStatus;
+   VAStatus vaStatus = VA_STATUS_SUCCESS;
+
+   switch (profile) {
+   case VAProfileMPEG2Simple:
+   case VAProfileMPEG2Main:
+      VA_INFO("Using profile %08x\n",profile);
+      entrypoint_list[0] = VAEntrypointMoComp;
+      *num_entrypoints = 1;
+      break;
+
+   case VAProfileH264Baseline:
+   case VAProfileH264Main:
+   case VAProfileH264High:
+      vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+      *num_entrypoints = 0;
+      break;
+
+   default:
+      VA_ERROR("Unsupported profile %08x\n",profile);
+      vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+      *num_entrypoints = 0;
+      break;
+   }
+
+   return vaStatus;
 }
 
-
-VAStatus vlVaGetConfigAttributes(       VADriverContextP ctx,
-                                        VAProfile profile,
-                                        VAEntrypoint entrypoint,
-                                        VAConfigAttrib *attrib_list,
-                                        int num_attribs)
+VAStatus
+vlVaGetConfigAttributes(VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint,
+                        VAConfigAttrib *attrib_list, int num_attribs)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaCreateConfig(       VADriverContextP ctx,
-                                 VAProfile profile,
-                                 VAEntrypoint entrypoint,
-                                 VAConfigAttrib *attrib_list,
-                                 int num_attribs,
-                                 VAConfigID *config_id)
+VAStatus
+vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, VAEntrypoint entrypoint,
+                 VAConfigAttrib *attrib_list, int num_attribs, VAConfigID *config_id)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaDestroyConfig(       VADriverContextP ctx,
-                                  VAConfigID config_id)
+VAStatus
+vlVaDestroyConfig(VADriverContextP ctx, VAConfigID config_id)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaQueryConfigAttributes(       VADriverContextP ctx,
-        VAConfigID config_id,
-        VAProfile *profile,
-        VAEntrypoint *entrypoint,
-        VAConfigAttrib *attrib_list,
-        int *num_attribs)
+VAStatus
+vlVaQueryConfigAttributes(VADriverContextP ctx, VAConfigID config_id, VAProfile *profile,
+                          VAEntrypoint *entrypoint, VAConfigAttrib *attrib_list, int *num_attribs)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
-
diff --git a/src/gallium/state_trackers/va/va_context.c b/src/gallium/state_trackers/va/va_context.c
index cdb20cc0eb2..62ba3d24ae3 100644
--- a/src/gallium/state_trackers/va/va_context.c
+++ b/src/gallium/state_trackers/va/va_context.c
@@ -25,83 +25,82 @@
  *
  **************************************************************************/
 
-#include <pipe/p_compiler.h>
-#include <pipe/p_video_context.h>
+#include <va/va.h>
+#include <va/va_backend.h>
+
 #include <pipe/p_screen.h>
-#include <vl_winsys.h>
+#include <pipe/p_screen.h>
+#include <pipe/p_video_decoder.h>
+
 #include <util/u_debug.h>
 #include <util/u_memory.h>
-#include <va/va.h>
-#include <va/va_backend.h>
-#include "va_private.h"
 
-//struct VADriverVTable vlVaGetVtable();
+#include <vl_winsys.h>
+
+#include "va_private.h"
 
-PUBLIC
-VAStatus __vaDriverInit_0_31 (VADriverContextP ctx)
+PUBLIC VAStatus
+__vaDriverInit_0_31(VADriverContextP ctx)
 {
-	vlVaDriverContextPriv *driver_context = NULL;
+   vlVaDriverContextPriv *driver_context = NULL;
 	
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 		
 		
-	/* Create private driver context */
-	driver_context = CALLOC(1,sizeof(vlVaDriverContextPriv));
-	if (!driver_context)
-		return VA_STATUS_ERROR_ALLOCATION_FAILED;
+   /* Create private driver context */
+   driver_context = CALLOC(1,sizeof(vlVaDriverContextPriv));
+   if (!driver_context)
+      return VA_STATUS_ERROR_ALLOCATION_FAILED;
 		
-    driver_context->vscreen = vl_screen_create(ctx->native_dpy, ctx->x11_screen);
-	if (!driver_context->vscreen)
-	{
-		FREE(driver_context);
-		return VA_STATUS_ERROR_ALLOCATION_FAILED;
-	}
+   driver_context->vscreen = vl_screen_create(ctx->native_dpy, ctx->x11_screen);
+   if (!driver_context->vscreen) {
+      FREE(driver_context);
+      return VA_STATUS_ERROR_ALLOCATION_FAILED;
+   }
 		
-	ctx->str_vendor = "mesa gallium vaapi";
-	ctx->vtable = vlVaGetVtable();
-	ctx->max_attributes = 1;
-	ctx->max_display_attributes = 1;
-	ctx->max_entrypoints = VA_MAX_ENTRYPOINTS;
-	ctx->max_image_formats = VA_MAX_IMAGE_FORMATS_SUPPORTED;
-	ctx->max_profiles = 1;
-	ctx->max_subpic_formats = VA_MAX_SUBPIC_FORMATS_SUPPORTED;
-	ctx->version_major = 3;
-	ctx->version_minor = 1;
-	ctx->pDriverData = (void *)driver_context;
+   ctx->str_vendor = "mesa gallium vaapi";
+   ctx->vtable = vlVaGetVtable();
+   ctx->max_attributes = 1;
+   ctx->max_display_attributes = 1;
+   ctx->max_entrypoints = VA_MAX_ENTRYPOINTS;
+   ctx->max_image_formats = VA_MAX_IMAGE_FORMATS_SUPPORTED;
+   ctx->max_profiles = 1;
+   ctx->max_subpic_formats = VA_MAX_SUBPIC_FORMATS_SUPPORTED;
+   ctx->version_major = 3;
+   ctx->version_minor = 1;
+   ctx->pDriverData = (void *)driver_context;
 
-	VA_INFO("vl_screen_pointer %p\n",ctx->native_dpy);
+   VA_INFO("vl_screen_pointer %p\n",ctx->native_dpy);
 
-	return VA_STATUS_SUCCESS;
+   return VA_STATUS_SUCCESS;
 }
 
-VAStatus vlVaCreateContext(       VADriverContextP ctx,
-                                  VAConfigID config_id,
-                                  int picture_width,
-                                  int picture_height,
-                                  int flag,
-                                  VASurfaceID *render_targets,
-                                  int num_render_targets,
-                                  VAContextID *conext)
+VAStatus
+vlVaCreateContext(VADriverContextP ctx, VAConfigID config_id, int picture_width,
+                  int picture_height, int flag, VASurfaceID *render_targets,
+                  int num_render_targets, VAContextID *conext)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaDestroyContext(       VADriverContextP ctx,
-                                   VAContextID context)
+VAStatus
+vlVaDestroyContext(VADriverContextP ctx, VAContextID context)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaTerminate(       VADriverContextP ctx)
+VAStatus
+vlVaTerminate(VADriverContextP ctx)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
diff --git a/src/gallium/state_trackers/va/va_display.c b/src/gallium/state_trackers/va/va_display.c
index 1aaaf7ccc53..11116d0c00a 100644
--- a/src/gallium/state_trackers/va/va_display.c
+++ b/src/gallium/state_trackers/va/va_display.c
@@ -27,44 +27,37 @@
 
  #include <va/va.h>
  #include <va/va_backend.h>
+ 
  #include "va_private.h"
 
-
-VAStatus  vlVaQueryDisplayAttributes(		VADriverContextP ctx,
-								VADisplayAttribute *attr_list,
-								int *num_attributes)
+VAStatus
+vlVaQueryDisplayAttributes(VADriverContextP ctx, VADisplayAttribute *attr_list, int *num_attributes)
 {
-        if (!ctx)
-                return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-		if (!(attr_list && num_attributes))
-			return VA_STATUS_ERROR_UNKNOWN;
+   if (!(attr_list && num_attributes))
+      return VA_STATUS_ERROR_UNKNOWN;
 
-        *num_attributes = 0;
+   *num_attributes = 0;
 
-		return VA_STATUS_SUCCESS;
+   return VA_STATUS_SUCCESS;
 }
 
-VAStatus  vlVaGetDisplayAttributes(		VADriverContextP ctx,
-								VADisplayAttribute *attr_list,
-								int num_attributes)
+VAStatus
+vlVaGetDisplayAttributes(VADriverContextP ctx, VADisplayAttribute *attr_list, int num_attributes)
 {
-        if (!ctx)
-                return VA_STATUS_ERROR_INVALID_CONTEXT;
-
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-        return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus  vlVaSetDisplayAttributes(		VADriverContextP ctx,
-								VADisplayAttribute *attr_list,
-								int num_attributes)
+VAStatus
+vlVaSetDisplayAttributes(VADriverContextP ctx, VADisplayAttribute *attr_list, int num_attributes)
 {
-        if (!ctx)
-                return VA_STATUS_ERROR_INVALID_CONTEXT;
-
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-        return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
-
-
diff --git a/src/gallium/state_trackers/va/va_image.c b/src/gallium/state_trackers/va/va_image.c
index 8d20bfa9174..43c3a946737 100644
--- a/src/gallium/state_trackers/va/va_image.c
+++ b/src/gallium/state_trackers/va/va_image.c
@@ -25,154 +25,127 @@
  *
  **************************************************************************/
 
+#include <va/va.h>
+#include <va/va_backend.h>
+
+#include <pipe/p_format.h>
+
 #include <util/u_memory.h>
 #include <util/u_format.h>
 #include <util/u_debug.h>
-#include <pipe/p_format.h>
-#include <va/va.h>
-#include <va/va_backend.h>
+
 #include "va_private.h"
 
 typedef struct  {
-	enum pipe_format pipe_format;
-	VAImageFormat       va_format;
+   enum pipe_format pipe_format;
+   VAImageFormat       va_format;
 } va_image_formats_supported_t;
 
 static const va_image_formats_supported_t va_image_formats_supported[VA_MAX_IMAGE_FORMATS_SUPPORTED] = 
 {
-	{ PIPE_FORMAT_B8G8R8A8_UNORM,
+   { PIPE_FORMAT_B8G8R8A8_UNORM,
       { VA_FOURCC('B','G','R','A'), VA_LSB_FIRST, 32, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 }},
-    { PIPE_FORMAT_R8G8B8A8_UNORM, 
-	  { VA_FOURCC_RGBA, VA_LSB_FIRST, 32, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 }}
+   { PIPE_FORMAT_R8G8B8A8_UNORM, 
+      { VA_FOURCC_RGBA, VA_LSB_FIRST, 32, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 }}
 };
 
-boolean vlCreateHTAB(void);
-void vlDestroyHTAB(void);
-vlHandle vlAddDataHTAB(void *data);
-void* vlGetDataHTAB(vlHandle handle);
-
 VAStatus
-vlVaQueryImageFormats ( 	VADriverContextP ctx,
-                            VAImageFormat *format_list,
-                            int *num_formats)
+vlVaQueryImageFormats(VADriverContextP ctx, VAImageFormat *format_list, int *num_formats)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	if (!(format_list && num_formats))
-		return VA_STATUS_ERROR_UNKNOWN;
+   if (!(format_list && num_formats))
+      return VA_STATUS_ERROR_UNKNOWN;
 		
-	int n = 0;
+   int n = 0;
 	
-	num_formats[0] = VA_MAX_IMAGE_FORMATS_SUPPORTED;
+   num_formats[0] = VA_MAX_IMAGE_FORMATS_SUPPORTED;
 	
-	/* Query supported formats */
-	for (n = 0; n < VA_MAX_IMAGE_FORMATS_SUPPORTED; n++)
-	{
-		format_list[n] = va_image_formats_supported[n].va_format;
-	}
+   /* Query supported formats */
+   for (n = 0; n < VA_MAX_IMAGE_FORMATS_SUPPORTED; n++) {
+      format_list[n] = va_image_formats_supported[n].va_format;
+   }
 
-	return VA_STATUS_SUCCESS;
+   return VA_STATUS_SUCCESS;
 }
 
-VAStatus vlVaCreateImage(	VADriverContextP ctx,
-                            VAImageFormat *format,
-                            int width,
-                            int height,
-                            VAImage *image)
+VAStatus
+vlVaCreateImage(VADriverContextP ctx, VAImageFormat *format, int width, int height, VAImage *image)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	if(!format)
-		return VA_STATUS_ERROR_UNKNOWN;
+   if(!format)
+      return VA_STATUS_ERROR_UNKNOWN;
 		
-	if (!(width && height))
-		return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
+   if (!(width && height))
+      return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
 		
-	if (!vlCreateHTAB())
-		return VA_STATUS_ERROR_UNKNOWN; 
+   if (!vlCreateHTAB())
+      return VA_STATUS_ERROR_UNKNOWN; 
 		
-	switch (format->fourcc) {
-	case VA_FOURCC('B','G','R','A'):
-		VA_INFO("Creating BGRA image of size %dx%d\n",width,height);
-	break;
-	case VA_FOURCC_RGBA:
-		VA_INFO("Creating RGBA image of size %dx%d\n",width,height);
-	break;
-	default:
-		VA_ERROR("Couldn't create image of type %0x08\n",format->fourcc);
-		return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
-	break;
-	}
+   switch (format->fourcc) {
+   case VA_FOURCC('B','G','R','A'):
+      VA_INFO("Creating BGRA image of size %dx%d\n",width,height);
+      break;
+   case VA_FOURCC_RGBA:
+      VA_INFO("Creating RGBA image of size %dx%d\n",width,height);
+      break;
+   default:
+      VA_ERROR("Couldn't create image of type %0x08\n",format->fourcc);
+      return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
+   }
 		
-	VA_INFO("Image %p created successfully\n",format);
+   VA_INFO("Image %p created successfully\n",format);
 	
-	return VA_STATUS_SUCCESS;
+   return VA_STATUS_SUCCESS;
 }
 
-VAStatus vlVaDeriveImage(	VADriverContextP ctx,
-                            VASurfaceID surface,
-                            VAImage *image)
+VAStatus
+vlVaDeriveImage(VADriverContextP ctx, VASurfaceID surface, VAImage *image)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
-
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaDestroyImage(	VADriverContextP ctx,
-                            VAImageID image)
+VAStatus
+vlVaDestroyImage(VADriverContextP ctx, VAImageID image)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaSetImagePalette(	VADriverContextP ctx,
-                            VAImageID image,
-                            unsigned char *palette)
+VAStatus
+vlVaSetImagePalette(VADriverContextP ctx, VAImageID image, unsigned char *palette)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaGetImage(		VADriverContextP ctx,
-                            VASurfaceID surface,
-                            int x,
-                            int y,
-                            unsigned int width,
-                            unsigned int height,
-                            VAImageID image)
+VAStatus
+vlVaGetImage(VADriverContextP ctx, VASurfaceID surface, int x, int y,
+             unsigned int width, unsigned int height, VAImageID image)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
-
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaPutImage(		VADriverContextP ctx,
-                            VASurfaceID surface,
-                            VAImageID image,
-                            int src_x,
-                            int src_y,
-                            unsigned int src_width,
-                            unsigned int src_height,
-                            int dest_x,
-                            int dest_y,
-                            unsigned int dest_width,
-                            unsigned int dest_height)
+VAStatus
+vlVaPutImage(VADriverContextP ctx, VASurfaceID surface, VAImageID image,
+             int src_x, int src_y, unsigned int src_width, unsigned int src_height,
+             int dest_x, int dest_y, unsigned int dest_width, unsigned int dest_height)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
-
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
diff --git a/src/gallium/state_trackers/va/va_picture.c b/src/gallium/state_trackers/va/va_picture.c
index 3603dfb6fed..3b52a6a7e10 100644
--- a/src/gallium/state_trackers/va/va_picture.c
+++ b/src/gallium/state_trackers/va/va_picture.c
@@ -27,35 +27,34 @@
 
 #include <va/va.h>
 #include <va/va_backend.h>
+
 #include <util/u_debug.h>
+
 #include "va_private.h"
 
-VAStatus vlVaBeginPicture(       VADriverContextP ctx,
-                                 VAContextID context,
-                                 VASurfaceID render_target)
+VAStatus
+vlVaBeginPicture(VADriverContextP ctx, VAContextID context, VASurfaceID render_target)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaRenderPicture(       VADriverContextP ctx,
-                                  VAContextID context,
-                                  VABufferID *buffers,
-                                  int num_buffers)
+VAStatus
+vlVaRenderPicture(VADriverContextP ctx, VAContextID context, VABufferID *buffers, int num_buffers)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaEndPicture(       VADriverContextP ctx,
-                               VAContextID context)
+VAStatus
+vlVaEndPicture(VADriverContextP ctx, VAContextID context)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
diff --git a/src/gallium/state_trackers/va/va_private.h b/src/gallium/state_trackers/va/va_private.h
index 625c6cdbe1b..f1023dbf87b 100644
--- a/src/gallium/state_trackers/va/va_private.h
+++ b/src/gallium/state_trackers/va/va_private.h
@@ -30,6 +30,7 @@
 
 #include <va/va.h>
 #include <va/va_backend.h>
+
 #include <pipe/p_format.h>
 #include <pipe/p_state.h>
 
@@ -44,18 +45,16 @@
 
 #define VL_HANDLES
 
-typedef unsigned int vlHandle;
-
 typedef struct {
-	struct vl_screen *vscreen;
-	struct pipe_surface *backbuffer;
+   struct vl_screen *vscreen;
+   struct pipe_surface *backbuffer;
 } vlVaDriverContextPriv;
 
 typedef struct {
-	unsigned int width;
-	unsigned int height;
-	enum pipe_video_chroma_format format;
-	VADriverContextP ctx;
+   unsigned int width;
+   unsigned int height;
+   enum pipe_video_chroma_format format;
+   VADriverContextP ctx;
 } vlVaSurfacePriv;
 
 // Public functions:
@@ -64,6 +63,10 @@ VAStatus __vaDriverInit_0_31 (VADriverContextP ctx);
 // Private functions:
 struct VADriverVTable vlVaGetVtable();
 
+bool vlCreateHTAB(void);
+void vlDestroyHTAB(void);
+VAGenericID vlAddDataHTAB(void *data);
+void* vlGetDataHTAB(VAGenericID handle);
 
 // Vtable functions:
 VAStatus vlVaTerminate (VADriverContextP ctx);
diff --git a/src/gallium/state_trackers/va/va_subpicture.c b/src/gallium/state_trackers/va/va_subpicture.c
index 910e5bd7b70..3f370e5889f 100644
--- a/src/gallium/state_trackers/va/va_subpicture.c
+++ b/src/gallium/state_trackers/va/va_subpicture.c
@@ -27,131 +27,117 @@
 
 #include <va/va.h>
 #include <va/va_backend.h>
+
 #include <pipe/p_format.h>
-#include "va_private.h"
 
+#include "va_private.h"
 
 typedef struct  {
-	enum pipe_format	pipe_format;
-	VAImageFormat       va_format;
-    unsigned int        va_flags;
+   enum pipe_format pipe_format;
+   VAImageFormat    va_format;
+   unsigned int     va_flags;
 } va_subpicture_formats_supported_t;
 
 static const va_subpicture_formats_supported_t va_subpicture_formats_supported[VA_MAX_SUBPIC_FORMATS_SUPPORTED + 1] = 
 {
-	{ PIPE_FORMAT_B8G8R8A8_UNORM,
+   { PIPE_FORMAT_B8G8R8A8_UNORM,
       { VA_FOURCC('B','G','R','A'), VA_LSB_FIRST, 32, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 },
       0 },
-    { PIPE_FORMAT_R8G8B8A8_UNORM, 
-	  { VA_FOURCC_RGBA, VA_LSB_FIRST, 32, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 },
+   { PIPE_FORMAT_R8G8B8A8_UNORM, 
+      { VA_FOURCC_RGBA, VA_LSB_FIRST, 32, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 },
       0 }
 };
 
 VAStatus
-vlVaQuerySubpictureFormats(		VADriverContextP ctx,
-                                VAImageFormat *format_list,
-                                unsigned int *flags,
-                                unsigned int *num_formats)
+vlVaQuerySubpictureFormats(VADriverContextP ctx, VAImageFormat *format_list,
+                           unsigned int *flags, unsigned int *num_formats)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 		
-	if (!(format_list && flags && num_formats))
-		return VA_STATUS_ERROR_UNKNOWN;
+   if (!(format_list && flags && num_formats))
+      return VA_STATUS_ERROR_UNKNOWN;
 		
-	num_formats[0] = VA_MAX_SUBPIC_FORMATS_SUPPORTED;
+   num_formats[0] = VA_MAX_SUBPIC_FORMATS_SUPPORTED;
 		
-	int n = 0;
-	/* Query supported formats */
-	for (n = 0; n < VA_MAX_SUBPIC_FORMATS_SUPPORTED ; n++)
-	{
-		const va_subpicture_formats_supported_t * const format_map = &va_subpicture_formats_supported[n];
-		flags[n] = format_map->va_flags;
-		format_list[n] = format_map->va_format;
-	}
-
-	return VA_STATUS_SUCCESS;
+   int n = 0;
+   /* Query supported formats */
+   for (n = 0; n < VA_MAX_SUBPIC_FORMATS_SUPPORTED ; n++) {
+      const va_subpicture_formats_supported_t * const format_map = &va_subpicture_formats_supported[n];
+      flags[n] = format_map->va_flags;
+      format_list[n] = format_map->va_format;
+   }
+
+   return VA_STATUS_SUCCESS;
 }
 
-
-VAStatus vlVaCreateSubpicture(		VADriverContextP ctx,
-                                    VAImageID image,
-                                    VASubpictureID *subpicture)
+VAStatus
+vlVaCreateSubpicture(VADriverContextP ctx, VAImageID image, VASubpictureID *subpicture)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaDestroySubpicture(		VADriverContextP ctx,
-                                    VASubpictureID subpicture)
+VAStatus
+vlVaDestroySubpicture(VADriverContextP ctx, VASubpictureID subpicture)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaSubpictureImage(		VADriverContextP ctx,
-                                    VASubpictureID subpicture,
-                                    VAImageID image)
+VAStatus
+vlVaSubpictureImage(VADriverContextP ctx, VASubpictureID subpicture, VAImageID image)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaSetSubpictureChromakey(	VADriverContextP ctx,
-                                        VASubpictureID subpicture,
-                                        unsigned int chromakey_min,
-                                        unsigned int chromakey_max,
-                                        unsigned int chromakey_mask)
+VAStatus
+vlVaSetSubpictureChromakey(VADriverContextP ctx, VASubpictureID subpicture,
+                           unsigned int chromakey_min, unsigned int chromakey_max, unsigned int chromakey_mask)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaSetSubpictureGlobalAlpha(	VADriverContextP ctx,
-                                        VASubpictureID subpicture,
-                                        float global_alpha)
+VAStatus
+vlVaSetSubpictureGlobalAlpha(VADriverContextP ctx, VASubpictureID subpicture, float global_alpha)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaAssociateSubpicture(	VADriverContextP ctx,
-                                    VASubpictureID subpicture,
-                                    VASurfaceID *target_surfaces,
-                                    int num_surfaces,
-                                    short src_x,
-                                    short src_y,
-                                    unsigned short src_width,
-                                    unsigned short src_height,
-                                    short dest_x,
-                                    short dest_y,
-                                    unsigned short dest_width,
-                                    unsigned short dest_height,
-                                    unsigned int flags)
+VAStatus
+vlVaAssociateSubpicture(VADriverContextP ctx, VASubpictureID subpicture, VASurfaceID *target_surfaces,
+                        int num_surfaces, short src_x, short src_y,
+                        unsigned short src_width, unsigned short src_height,
+                        short dest_x, short dest_y,
+                        unsigned short dest_width,
+                        unsigned short dest_height,
+                        unsigned int flags)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaDeassociateSubpicture(	VADriverContextP ctx,
-                                    VASubpictureID subpicture,
-                                    VASurfaceID *target_surfaces,
-                                    int num_surfaces)
+VAStatus
+vlVaDeassociateSubpicture(VADriverContextP ctx, VASubpictureID subpicture,
+                          VASurfaceID *target_surfaces, int num_surfaces)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
diff --git a/src/gallium/state_trackers/va/va_surface.c b/src/gallium/state_trackers/va/va_surface.c
index a86c806248a..b7f9732d248 100644
--- a/src/gallium/state_trackers/va/va_surface.c
+++ b/src/gallium/state_trackers/va/va_surface.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
-	* Copyright 2010 Thomas Balling Sørensen & Orasanu Lucian.
+ * Copyright 2010 Thomas Balling Sørensen & Orasanu Lucian.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -31,137 +31,112 @@
 #include <util/u_memory.h>
 #include "va_private.h"
 
-boolean vlCreateHTAB(void);
-void vlDestroyHTAB(void);
-vlHandle vlAddDataHTAB(void *data);
-void* vlGetDataHTAB(vlHandle handle);
-
-static enum pipe_video_chroma_format VaRTFormatToPipe(unsigned int va_type)
+static enum pipe_video_chroma_format
+VaRTFormatToPipe(unsigned int va_type)
 {
    switch (va_type) {
-      case VA_RT_FORMAT_YUV420:
-         return PIPE_VIDEO_CHROMA_FORMAT_420;
-      case VA_RT_FORMAT_YUV422:
-         return PIPE_VIDEO_CHROMA_FORMAT_422;
-      case VA_RT_FORMAT_YUV444:
-         return PIPE_VIDEO_CHROMA_FORMAT_444;
-      default:
-         assert(0);
+   case VA_RT_FORMAT_YUV420:
+      return PIPE_VIDEO_CHROMA_FORMAT_420;
+   case VA_RT_FORMAT_YUV422:
+      return PIPE_VIDEO_CHROMA_FORMAT_422;
+   case VA_RT_FORMAT_YUV444:
+      return PIPE_VIDEO_CHROMA_FORMAT_444;
+   default:
+      assert(0);
    }
 
    return -1;
 }
 
-VAStatus vlVaCreateSurfaces(       VADriverContextP ctx,
-                                   int width,
-                                   int height,
-                                   int format,
-                                   int num_surfaces,
-                                   VASurfaceID *surfaces)
+VAStatus
+vlVaCreateSurfaces(VADriverContextP ctx, int width, int height, int format,
+                   int num_surfaces, VASurfaceID *surfaces)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-    /* We only support one format */
-    if (VA_RT_FORMAT_YUV420 != format)
-        return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
+   /* We only support one format */
+   if (VA_RT_FORMAT_YUV420 != format)
+      return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
 		
-	if (!(width && height))
-		return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
+   if (!(width && height))
+      return VA_STATUS_ERROR_INVALID_IMAGE_FORMAT;
 		
-	if (!vlCreateHTAB())
-		return VA_STATUS_ERROR_UNKNOWN; 
+   if (!vlCreateHTAB())
+      return VA_STATUS_ERROR_UNKNOWN; 
 		
-	vlVaSurfacePriv *va_surface = (vlVaSurfacePriv *)CALLOC(num_surfaces,sizeof(vlVaSurfacePriv));
-	if (!va_surface)
-		return VA_STATUS_ERROR_ALLOCATION_FAILED;
+   vlVaSurfacePriv *va_surface = (vlVaSurfacePriv *)CALLOC(num_surfaces,sizeof(vlVaSurfacePriv));
+   if (!va_surface)
+      return VA_STATUS_ERROR_ALLOCATION_FAILED;
 		
-	int n = 0;
-	for (n = 0; n < num_surfaces; n++)
-	{
-		va_surface[n].width = width;
-		va_surface[n].height = height;
-		va_surface[n].format = VaRTFormatToPipe(format);
-		va_surface[n].ctx = ctx;
-		surfaces[n] = (VASurfaceID *)vlAddDataHTAB((void *)(va_surface + n));
-	}
-
-	return VA_STATUS_SUCCESS;
+   int n = 0;
+   for (n = 0; n < num_surfaces; n++) {
+      va_surface[n].width = width;
+      va_surface[n].height = height;
+      va_surface[n].format = VaRTFormatToPipe(format);
+      va_surface[n].ctx = ctx;
+      surfaces[n] = vlAddDataHTAB((void *)(va_surface + n));
+   }
+
+   return VA_STATUS_SUCCESS;
 }
 
-VAStatus vlVaDestroySurfaces(       VADriverContextP ctx,
-                                    VASurfaceID *surface_list,
-                                    int num_surfaces)
+VAStatus
+vlVaDestroySurfaces(VADriverContextP ctx, VASurfaceID *surface_list, int num_surfaces)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaSyncSurface(       VADriverContextP ctx,
-                                VASurfaceID render_target)
+VAStatus
+vlVaSyncSurface(VADriverContextP ctx, VASurfaceID render_target)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaQuerySurfaceStatus(       VADriverContextP ctx,
-                                       VASurfaceID render_target,
-                                       VASurfaceStatus *status)
+VAStatus
+vlVaQuerySurfaceStatus(VADriverContextP ctx, VASurfaceID render_target, VASurfaceStatus *status)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaPutSurface(       VADriverContextP ctx,
-                               VASurfaceID surface,
-                               void* draw,
-                               short srcx,
-                               short srcy,
-                               unsigned short srcw,
-                               unsigned short srch,
-                               short destx,
-                               short desty,
-                               unsigned short destw,
-                               unsigned short desth,
-                               VARectangle *cliprects,
-                               unsigned int number_cliprects,
-                               unsigned int flags)
+VAStatus
+vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface, void* draw, short srcx, short srcy,
+               unsigned short srcw, unsigned short srch, short destx, short desty,
+               unsigned short destw, unsigned short desth, VARectangle *cliprects,
+               unsigned int number_cliprects,  unsigned int flags)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaLockSurface(	VADriverContextP ctx,
-                            VASurfaceID surface,
-                            unsigned int *fourcc,
-                            unsigned int *luma_stride,
-                            unsigned int *chroma_u_stride,
-                            unsigned int *chroma_v_stride,
-                            unsigned int *luma_offset,
-                            unsigned int *chroma_u_offset,
-                            unsigned int *chroma_v_offset,
-                            unsigned int *buffer_name,
-                            void **buffer)
+VAStatus
+vlVaLockSurface(VADriverContextP ctx, VASurfaceID surface, unsigned int *fourcc,
+                unsigned int *luma_stride, unsigned int *chroma_u_stride, unsigned int *chroma_v_stride,
+                unsigned int *luma_offset, unsigned int *chroma_u_offset, unsigned int *chroma_v_offset,
+                unsigned int *buffer_name, void **buffer)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
 
-VAStatus vlVaUnlockSurface(	VADriverContextP ctx,
-                            VASurfaceID surface)
+VAStatus
+vlVaUnlockSurface(VADriverContextP ctx, VASurfaceID surface)
 {
-	if (!ctx)
-		return VA_STATUS_ERROR_INVALID_CONTEXT;
+   if (!ctx)
+      return VA_STATUS_ERROR_INVALID_CONTEXT;
 
-	return VA_STATUS_ERROR_UNIMPLEMENTED;
+   return VA_STATUS_ERROR_UNIMPLEMENTED;
 }
-- 
cgit v1.2.3


From 7c48575402e8c384db2fab24e4dd0fc72bef0451 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 13 Jul 2011 15:01:40 +0200
Subject: [g3dvl] keep a pointer in idct buffer to idct object

So we always know to which idct object a buffer belongs
---
 src/gallium/auxiliary/vl/vl_idct.c           | 25 ++++++++++++++-----------
 src/gallium/auxiliary/vl/vl_idct.h           |  8 +++++---
 src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 13 +++++++------
 3 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c
index 744a35603d7..75e76c09f63 100644
--- a/src/gallium/auxiliary/vl/vl_idct.c
+++ b/src/gallium/auxiliary/vl/vl_idct.c
@@ -791,6 +791,8 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
 
    memset(buffer, 0, sizeof(struct vl_idct_buffer));
 
+   buffer->idct = idct;
+
    pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, idct->matrix);
    pipe_sampler_view_reference(&buffer->sampler_views.individual.source, source);
    pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose);
@@ -806,22 +808,24 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
 }
 
 void
-vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer)
 {
-   assert(idct && buffer);
+   assert(buffer);
 
-   cleanup_source(idct, buffer);
-   cleanup_intermediate(idct, buffer);
+   cleanup_source(buffer->idct, buffer);
+   cleanup_intermediate(buffer->idct, buffer);
 
    pipe_sampler_view_reference(&buffer->sampler_views.individual.matrix, NULL);
    pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, NULL);
 }
 
 void
-vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_instances)
+vl_idct_flush(struct vl_idct_buffer *buffer, unsigned num_instances)
 {
-   assert(idct);
+   struct vl_idct *idct;
    assert(buffer);
+   
+   idct = buffer->idct;
 
    idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
    idct->pipe->bind_blend_state(idct->pipe, idct->blend);
@@ -844,14 +848,13 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_
 }
 
 void
-vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer)
+vl_idct_prepare_stage2(struct vl_idct_buffer *buffer)
 {
-   assert(idct);
    assert(buffer);
 
    /* second stage */
-   idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state);
-   idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers);
-   idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[1]);
+   buffer->idct->pipe->bind_rasterizer_state(buffer->idct->pipe, buffer->idct->rs_state);
+   buffer->idct->pipe->bind_fragment_sampler_states(buffer->idct->pipe, 2, buffer->idct->samplers);
+   buffer->idct->pipe->set_fragment_sampler_views(buffer->idct->pipe, 2, buffer->sampler_views.stage[1]);
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h
index 119a53dbf27..98e2c795564 100644
--- a/src/gallium/auxiliary/vl/vl_idct.h
+++ b/src/gallium/auxiliary/vl/vl_idct.h
@@ -58,6 +58,8 @@ struct vl_idct
 /* a set of buffers to work with */
 struct vl_idct_buffer
 {
+   struct vl_idct *idct;
+   
    struct pipe_viewport_state viewport_mismatch;
    struct pipe_viewport_state viewport;
 
@@ -107,13 +109,13 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer,
 
 /* cleanup a buffer of an idct instance */
 void
-vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer);
+vl_idct_cleanup_buffer(struct vl_idct_buffer *buffer);
 
 /* flush the buffer and start rendering, vertex buffers needs to be setup before calling this */
 void
-vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_verts);
+vl_idct_flush(struct vl_idct_buffer *buffer, unsigned num_verts);
 
 void
-vl_idct_prepare_stage2(struct vl_idct *idct, struct vl_idct_buffer *buffer);
+vl_idct_prepare_stage2(struct vl_idct_buffer *buffer);
 
 #endif
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 7bb5a695e40..98b0adabb2b 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -177,7 +177,7 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer)
 
 error_plane:
    for (; i > 0; --i)
-      vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]);
+      vl_idct_cleanup_buffer(&buffer->idct[i - 1]);
 
 error_mc_source_sv:
 error_source_sv:
@@ -188,14 +188,15 @@ static void
 cleanup_idct_buffer(struct vl_mpeg12_buffer *buf)
 {
    struct vl_mpeg12_decoder *dec;
+   unsigned i;
+   
    assert(buf);
 
    dec = (struct vl_mpeg12_decoder*)buf->base.decoder;
    assert(dec);
 
-   vl_idct_cleanup_buffer(&dec->idct_y, &buf->idct[0]);
-   vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[1]);
-   vl_idct_cleanup_buffer(&dec->idct_c, &buf->idct[2]);
+   for (i = 0; i < 3; ++i)
+      vl_idct_cleanup_buffer(&buf->idct[0]);
 }
 
 static bool
@@ -574,7 +575,7 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
       vl_zscan_render(&buf->zscan[i] , num_ycbcr_blocks[i]);
 
       if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-         vl_idct_flush(i == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[i], num_ycbcr_blocks[i]);
+         vl_idct_flush(&buf->idct[i], num_ycbcr_blocks[i]);
    }
 
    mc_source_sv = dec->mc_source->get_sampler_view_planes(dec->mc_source);
@@ -589,7 +590,7 @@ vl_mpeg12_decoder_flush_buffer(struct pipe_video_decode_buffer *buffer,
          dec->base.context->set_vertex_buffers(dec->base.context, 3, vb);
 
          if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT)
-            vl_idct_prepare_stage2(component == 0 ? &dec->idct_y : &dec->idct_c, &buf->idct[component]);
+            vl_idct_prepare_stage2(&buf->idct[component]);
          else {
             dec->base.context->set_fragment_sampler_views(dec->base.context, 1, &mc_source_sv[component]);
             dec->base.context->bind_fragment_sampler_states(dec->base.context, 1, &dec->sampler_ycbcr);
-- 
cgit v1.2.3


From c8dd301b6fd6bb473d8f9f16689d9689d9608520 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 13 Jul 2011 15:54:28 +0200
Subject: r600g: revert "set BLEND_CLAMP depending on clamp_fragment_color"

BLEND_CLAMP doesn't seems to be the right way to implement "ARB_color_buffer_float".
---
 src/gallium/drivers/r600/r600_state.c         | 11 +++--------
 src/gallium/winsys/r600/drm/r600_hw_context.c | 18 ++++++++----------
 2 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index f8f7c2031db..01406f2bad6 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -874,7 +874,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
 	struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
 	struct r600_pipe_rasterizer *rs = CALLOC_STRUCT(r600_pipe_rasterizer);
 	struct r600_pipe_state *rstate;
-	unsigned tmp, cb;
+	unsigned tmp;
 	unsigned prov_vtx = 1, polygon_dual_mode;
 	unsigned clip_rule;
 
@@ -949,11 +949,6 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
 	r600_pipe_state_add_reg(rstate, R_028DFC_PA_SU_POLY_OFFSET_CLAMP, 0x00000000, 0xFFFFFFFF, NULL);
 	r600_pipe_state_add_reg(rstate, R_02820C_PA_SC_CLIPRECT_RULE, clip_rule, 0xFFFFFFFF, NULL);
 
-	for (cb = 0; cb < 7; ++cb)
-		r600_pipe_state_add_reg(rstate, R_0280A0_CB_COLOR0_INFO + cb * 4,
-					S_0280A0_BLEND_CLAMP(state->clamp_fragment_color),
-					S_0280A0_BLEND_CLAMP(1), NULL);
-
 	return rstate;
 }
 
@@ -1409,6 +1404,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 	color_info = S_0280A0_FORMAT(format) |
 		S_0280A0_COMP_SWAP(swap) |
 		S_0280A0_ARRAY_MODE(rtex->array_mode[level]) |
+		S_0280A0_BLEND_CLAMP(1) |
 		S_0280A0_NUMBER_TYPE(ntype) |
 		S_0280A0_ENDIAN(endian);
 
@@ -1421,7 +1417,6 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 		 * - BLEND_CLAMP is enabled
 		 * - BLEND_FLOAT32 is disabled
 		 */
-		// TODO get BLEND_CLAMP state from rasterizer state
 		if (desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS &&
 		    (desc->channel[i].size < 12 &&
 		     desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT &&
@@ -1449,7 +1444,7 @@ static void r600_cb(struct r600_pipe_context *rctx, struct r600_pipe_state *rsta
 				(offset + r600_bo_offset(bo[0])) >> 8, 0xFFFFFFFF, bo[0]);
 	r600_pipe_state_add_reg(rstate,
 				R_0280A0_CB_COLOR0_INFO + cb * 4,
-				color_info, ~S_0280A0_BLEND_CLAMP(1), NULL);
+				color_info, 0xFFFFFFFF, bo[0]);
 	r600_pipe_state_add_reg(rstate,
 				R_028060_CB_COLOR0_SIZE + cb * 4,
 				S_028060_PITCH_TILE_MAX(pitch) |
diff --git a/src/gallium/winsys/r600/drm/r600_hw_context.c b/src/gallium/winsys/r600/drm/r600_hw_context.c
index 69f7884f823..a2f13ff0863 100644
--- a/src/gallium/winsys/r600/drm/r600_hw_context.c
+++ b/src/gallium/winsys/r600/drm/r600_hw_context.c
@@ -1054,7 +1054,7 @@ void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_stat
 		}
 		if (block->flags & REG_FLAG_DIRTY_ALWAYS)
 			dirty |= R600_BLOCK_STATUS_DIRTY;
-		if (block->pm4_bo_index[id] && state->regs[i].bo) {
+		if (block->pm4_bo_index[id]) {
 			/* find relocation */
 			reloc_id = block->pm4_bo_index[id];
 			r600_bo_reference(ctx->radeon, &block->reloc[reloc_id].bo, reg->bo);
@@ -1298,15 +1298,13 @@ void r600_context_block_emit_dirty(struct r600_context *ctx, struct r600_block *
 			if (block->pm4_bo_index[j]) {
 				/* find relocation */
 				id = block->pm4_bo_index[j];
-				if (block->reloc[id].bo) {
-					r600_context_bo_reloc(ctx,
-							      &block->pm4[block->reloc[id].bo_pm4_index],
-							      block->reloc[id].bo);
-					r600_context_bo_flush(ctx,
-							      block->reloc[id].flush_flags,
-							      block->reloc[id].flush_mask,
-							      block->reloc[id].bo);
-				}
+				r600_context_bo_reloc(ctx,
+						      &block->pm4[block->reloc[id].bo_pm4_index],
+						      block->reloc[id].bo);
+				r600_context_bo_flush(ctx,
+						      block->reloc[id].flush_flags,
+						      block->reloc[id].flush_mask,
+						      block->reloc[id].bo);
 				nbo--;
 				if (nbo == 0)
 					break;
-- 
cgit v1.2.3


From c5110a1bfab8e97b8b958d42dd294a426310d1e2 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 13 Jul 2011 16:07:30 +0200
Subject: [g3dvl] implement workaround for missing blender clamp control

It's about 20% slower, but should at least work with every hardware.
---
 src/gallium/auxiliary/vl/vl_mc.c | 39 +++++++++++++++++++++++++++++++++------
 src/gallium/auxiliary/vl/vl_mc.h |  3 ++-
 2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/vl/vl_mc.c b/src/gallium/auxiliary/vl/vl_mc.c
index 3b665fafb7d..bd05205b52d 100644
--- a/src/gallium/auxiliary/vl/vl_mc.c
+++ b/src/gallium/auxiliary/vl/vl_mc.c
@@ -309,7 +309,8 @@ create_ycbcr_vert_shader(struct vl_mc *r, vl_mc_ycbcr_vert_shader vs_callback, v
 }
 
 static void *
-create_ycbcr_frag_shader(struct vl_mc *r, float scale, vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv)
+create_ycbcr_frag_shader(struct vl_mc *r, float scale, bool invert,
+                         vl_mc_ycbcr_frag_shader fs_callback, void *callback_priv)
 {
    struct ureg_program *shader;
    struct ureg_src flags;
@@ -349,13 +350,14 @@ create_ycbcr_frag_shader(struct vl_mc *r, float scale, vl_mc_ycbcr_frag_shader f
       fs_callback(callback_priv, r, shader, VS_O_VTEX, tmp);
 
       if (scale != 1.0f)
-         ureg_MAD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
+         ureg_MAD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ),
                   ureg_src(tmp), ureg_imm1f(shader, scale),
                   ureg_scalar(flags, TGSI_SWIZZLE_Z));
       else
-         ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ),
+         ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ),
                   ureg_src(tmp), ureg_scalar(flags, TGSI_SWIZZLE_Z));
-
+                  
+      ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_imm1f(shader, invert ? -1.0f : 1.0f));
       ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_imm1f(shader, 1.0f));
 
    ureg_fixup_label(shader, label, ureg_get_instruction_number(shader));
@@ -415,6 +417,12 @@ init_pipe_state(struct vl_mc *r)
       r->blend_add[i] = r->pipe->create_blend_state(r->pipe, &blend);
       if (!r->blend_add[i])
          goto error_blend;
+
+      blend.rt[0].rgb_func = PIPE_BLEND_REVERSE_SUBTRACT;
+      blend.rt[0].alpha_dst_factor = PIPE_BLEND_REVERSE_SUBTRACT;
+      r->blend_sub[i] = r->pipe->create_blend_state(r->pipe, &blend);
+      if (!r->blend_sub[i])
+         goto error_blend;
    }
 
    memset(&rs_state, 0, sizeof(rs_state));
@@ -432,6 +440,9 @@ init_pipe_state(struct vl_mc *r)
 error_rs_state:
 error_blend:
    for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
+      if (r->blend_sub[i])
+         r->pipe->delete_blend_state(r->pipe, r->blend_sub[i]);
+
       if (r->blend_add[i])
          r->pipe->delete_blend_state(r->pipe, r->blend_add[i]);
 
@@ -456,6 +467,7 @@ cleanup_pipe_state(struct vl_mc *r)
    for (i = 0; i < VL_MC_NUM_BLENDERS; ++i) {
       r->pipe->delete_blend_state(r->pipe, r->blend_clear[i]);
       r->pipe->delete_blend_state(r->pipe, r->blend_add[i]);
+      r->pipe->delete_blend_state(r->pipe, r->blend_sub[i]);
    }
    r->pipe->delete_rasterizer_state(r->pipe, r->rs_state);
 }
@@ -493,11 +505,18 @@ vl_mc_init(struct vl_mc *renderer, struct pipe_context *pipe,
    if (!renderer->fs_ref)
       goto error_fs_ref;
 
-   renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale, fs_callback, callback_priv);
+   renderer->fs_ycbcr = create_ycbcr_frag_shader(renderer, scale, false, fs_callback, callback_priv);
    if (!renderer->fs_ycbcr)
       goto error_fs_ycbcr;
 
+   renderer->fs_ycbcr_sub = create_ycbcr_frag_shader(renderer, scale, true, fs_callback, callback_priv);
+   if (!renderer->fs_ycbcr_sub)
+      goto error_fs_ycbcr_sub;
+
    return true;
+   
+error_fs_ycbcr_sub:
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
 
 error_fs_ycbcr:
    renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
@@ -526,6 +545,7 @@ vl_mc_cleanup(struct vl_mc *renderer)
    renderer->pipe->delete_vs_state(renderer->pipe, renderer->vs_ycbcr);
    renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ref);
    renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr);
+   renderer->pipe->delete_fs_state(renderer->pipe, renderer->fs_ycbcr_sub);
 }
 
 bool
@@ -616,13 +636,14 @@ void
 vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num_instances)
 {
    struct vl_mc *renderer;
+   unsigned mask = 1 << component;
 
    assert(buffer);
 
    if (num_instances == 0)
       return;
 
-   prepare_pipe_4_rendering(buffer, 1 << component);
+   prepare_pipe_4_rendering(buffer, mask);
 
    renderer = buffer->renderer;
 
@@ -630,4 +651,10 @@ vl_mc_render_ycbcr(struct vl_mc_buffer *buffer, unsigned component, unsigned num
    renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr);
 
    util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
+   
+   if (buffer->surface_cleared) {
+      renderer->pipe->bind_blend_state(renderer->pipe, renderer->blend_sub[mask]);
+      renderer->pipe->bind_fs_state(renderer->pipe, renderer->fs_ycbcr_sub);
+      util_draw_arrays_instanced(renderer->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances);
+   }
 }
diff --git a/src/gallium/auxiliary/vl/vl_mc.h b/src/gallium/auxiliary/vl/vl_mc.h
index 85ec69b3ce7..9fabf02a3ac 100644
--- a/src/gallium/auxiliary/vl/vl_mc.h
+++ b/src/gallium/auxiliary/vl/vl_mc.h
@@ -51,8 +51,9 @@ struct vl_mc
 
    void *blend_clear[VL_MC_NUM_BLENDERS];
    void *blend_add[VL_MC_NUM_BLENDERS];
+   void *blend_sub[VL_MC_NUM_BLENDERS];
    void *vs_ref, *vs_ycbcr;
-   void *fs_ref, *fs_ycbcr;
+   void *fs_ref, *fs_ycbcr, *fs_ycbcr_sub;
    void *sampler_ref;
 };
 
-- 
cgit v1.2.3


From 211887c92baf25db14251ed496140207b15e0ccb Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 13 Jul 2011 17:05:58 +0200
Subject: r600g: reenable hardware blits for STATIC and IMMUTABLE resources

Getting the driver in sync with mainline.
---
 src/gallium/drivers/r600/r600_texture.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_texture.c b/src/gallium/drivers/r600/r600_texture.c
index a6161e5f281..10c32c53a6d 100644
--- a/src/gallium/drivers/r600/r600_texture.c
+++ b/src/gallium/drivers/r600/r600_texture.c
@@ -298,7 +298,7 @@ static boolean permit_hardware_blit(struct pipe_screen *screen,
 	/* hackaround for S3TC */
 	if (util_format_is_compressed(res->format))
 		return TRUE;
-
+	    
 	if (!screen->is_format_supported(screen,
 				res->format,
 				res->target,
@@ -313,16 +313,14 @@ static boolean permit_hardware_blit(struct pipe_screen *screen,
                                 PIPE_BIND_SAMPLER_VIEW))
 		return FALSE;
 
-        switch (res->usage) {
-        case PIPE_USAGE_STREAM:
-        case PIPE_USAGE_STAGING:
-        case PIPE_USAGE_STATIC:
-        case PIPE_USAGE_IMMUTABLE:
-                return FALSE;
+	switch (res->usage) {
+	case PIPE_USAGE_STREAM:
+	case PIPE_USAGE_STAGING:
+		return FALSE;
 
-        default:
-                return TRUE;
-        }
+	default:
+		return TRUE;
+	}
 }
 
 static boolean r600_texture_get_handle(struct pipe_screen* screen,
-- 
cgit v1.2.3


From a2a6799fbefc6900f2371efab778c5bc2bf5a6e9 Mon Sep 17 00:00:00 2001
From: Christian König <deathsimple@vodafone.de>
Date: Wed, 13 Jul 2011 19:37:28 +0200
Subject: [g3dvl] remove some unneeded Makefiles

---
 src/gallium/winsys/g3dvl/drm/Makefile          | 12 -----
 src/gallium/winsys/g3dvl/drm/Makefile.template | 66 --------------------------
 src/gallium/winsys/g3dvl/drm/nouveau/Makefile  | 23 ---------
 src/gallium/winsys/g3dvl/drm/radeon/Makefile   | 20 --------
 4 files changed, 121 deletions(-)
 delete mode 100644 src/gallium/winsys/g3dvl/drm/Makefile
 delete mode 100644 src/gallium/winsys/g3dvl/drm/Makefile.template
 delete mode 100644 src/gallium/winsys/g3dvl/drm/nouveau/Makefile
 delete mode 100644 src/gallium/winsys/g3dvl/drm/radeon/Makefile

diff --git a/src/gallium/winsys/g3dvl/drm/Makefile b/src/gallium/winsys/g3dvl/drm/Makefile
deleted file mode 100644
index 0711f44d8e3..00000000000
--- a/src/gallium/winsys/g3dvl/drm/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# src/gallium/winsys/Makefile
-TOP = ../../../../..
-include $(TOP)/configs/current
-
-SUBDIRS = $(GALLIUM_WINSYS_DRM_DIRS)
-
-default install clean:
-	@for dir in $(SUBDIRS) ; do \
-		if [ -d $$dir ] ; then \
-			(cd $$dir && $(MAKE) $@) || exit 1; \
-		fi \
-	done
diff --git a/src/gallium/winsys/g3dvl/drm/Makefile.template b/src/gallium/winsys/g3dvl/drm/Makefile.template
deleted file mode 100644
index 2b79deef4bc..00000000000
--- a/src/gallium/winsys/g3dvl/drm/Makefile.template
+++ /dev/null
@@ -1,66 +0,0 @@
-XVMC_MAJOR = 1
-XVMC_MINOR = 0
-XVMC_LIB = XvMCg3dvl
-XVMC_LIB_NAME = lib$(XVMC_LIB).so
-XVMC_LIB_DEPS = $(EXTRA_LIB_PATH) -lXvMC -lXv -lX11 -lm
-
-###############################################################
-
-INCLUDES = $(DRIVER_INCLUDES) \
-           -I$(TOP)/src/gallium/include \
-           -I$(TOP)/src/gallium/auxiliary \
-           -I$(TOP)/src/gallium/drivers \
-           -I$(TOP)/src/gallium/winsys/g3dvl \
-           -I$(TOP)/src/gallium/winsys/g3dvl/dri
-
-DEFINES += $(DRIVER_DEFINES) \
-           -DGALLIUM_SOFTPIPE \
-           -DGALLIUM_TRACE
-
-# XXX: Hack, if we include libxvmctracker.a in LIBS none of the symbols are
-# pulled in by the linker because xsp_winsys.c doesn't refer to them
-OBJECTS = $(C_SOURCES:.c=.o) $(TOP)/src/gallium/state_trackers/xorg/xvmc/*.o
-
-LIBS = $(TOP)/src/gallium/winsys/g3dvl/dri/libvldri.a \
-       $(PIPE_DRIVERS) \
-       $(TOP)/src/gallium/auxiliary/libgallium.a
-
-.c.o:
-	$(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@
-
-.S.o:
-	$(CC) -c $(INCLUDES) $(DEFINES) $(CFLAGS) $< -o $@
-
-.PHONY: default $(TOP)/$(LIB_DIR)/gallium clean
-
-default: depend $(TOP)/$(LIB_DIR)/gallium $(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME)
-
-$(TOP)/$(LIB_DIR)/gallium:
-	@mkdir -p $(TOP)/$(LIB_DIR)/gallium
-
-# Make the libXvMCg3dvl.so library
-$(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME): $(OBJECTS) $(LIBS) Makefile
-	$(MKLIB) -o $(XVMC_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
-		-major $(XVMC_MAJOR) -minor $(XVMC_MINOR) $(MKLIB_OPTIONS) \
-		-install $(TOP)/$(LIB_DIR)/gallium -id $(INSTALL_LIB_DIR)/lib$(XVMC_LIB).1.dylib \
-		$(XVMC_LIB_DEPS) $(DRIVER_LIB_DEPS) $(OBJECTS) $(LIBS)
-
-depend: $(SOURCES) Makefile
-	$(RM) depend
-	touch depend
-	$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDES) $(SOURCES)
-
-#install: default
-#	$(INSTALL) -d $(INSTALL_DIR)/include/GL
-#	$(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR)
-#	$(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL
-#	@if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \
-#		$(INSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \
-#	fi
-
-clean: Makefile
-	$(RM) $(TOP)/$(LIB_DIR)/gallium/$(XVMC_LIB_NAME)
-	$(RM) *.o *~
-	$(RM) depend depend.bak
-
--include depend
diff --git a/src/gallium/winsys/g3dvl/drm/nouveau/Makefile b/src/gallium/winsys/g3dvl/drm/nouveau/Makefile
deleted file mode 100644
index 5c55186f672..00000000000
--- a/src/gallium/winsys/g3dvl/drm/nouveau/Makefile
+++ /dev/null
@@ -1,23 +0,0 @@
-# This makefile produces a libXvMCg3dvl.so which is
-# based on DRM/DRI
-
-TOP = ../../../../../..
-include $(TOP)/configs/current
-
-C_SOURCES =
-
-DRIVER_INCLUDES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-I) \
-                   -I$(TOP)/src/gallium/winsys/drm/nouveau
-DRIVER_DEFINES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-other)
-
-PIPE_DRIVERS = \
-	$(TOP)/src/gallium/winsys/drm/nouveau/drm/libnouveaudrm.a \
-	$(TOP)/src/gallium/drivers/nv30/libnv30.a \
-	$(TOP)/src/gallium/drivers/nv40/libnv40.a \
-	$(TOP)/src/gallium/drivers/nv50/libnv50.a \
-	$(TOP)/src/gallium/drivers/nouveau/libnouveau.a \
-        $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a
-
-DRIVER_LIB_DEPS += $(shell pkg-config libdrm_nouveau --libs)
-
-include ../Makefile.template
diff --git a/src/gallium/winsys/g3dvl/drm/radeon/Makefile b/src/gallium/winsys/g3dvl/drm/radeon/Makefile
deleted file mode 100644
index 0f7fd1c15ad..00000000000
--- a/src/gallium/winsys/g3dvl/drm/radeon/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# This makefile produces a libXvMCg3dvl.so which is
-# based on DRM/DRI
-
-TOP = ../../../../../..
-include $(TOP)/configs/current
-
-C_SOURCES =
-
-DRIVER_INCLUDES = $(shell pkg-config libdrm libdrm_radeon --cflags-only-I) \
-                   -I$(TOP)/src/gallium/winsys/drm/radeon \
-DRIVER_DEFINES = $(shell pkg-config libdrm libdrm_radeon --cflags-only-other)
-
-PIPE_DRIVERS = \
-	$(TOP)/src/gallium/winsys/drm/radeon/core/libradeonwinsys.a \
-	$(TOP)/src/gallium/drivers/r300/libr300.a \
-        $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a
-
-DRIVER_LIB_DEPS += $(shell pkg-config libdrm_radeon --libs)
-
-include ../Makefile.template
-- 
cgit v1.2.3