Merge remote-tracking branch 'mesa-public/master' into vulkan

author: Jason Ekstrand <[email protected]> 2015-08-14 17:25:04 -0700
committer: Jason Ekstrand <[email protected]> 2015-08-17 11:25:03 -0700
commit: 6a7ca4ef2cd3f39d3b5e77051cb3f3175e9e60df (patch)
tree: d5413781ac9e9ecfc22cf403fa7465d6a7cadb34 /src/gallium/auxiliary
parent: b4c02253c4e1a7bc5a7a6369045210932f5de605 (diff)
parent: d3e23f1ff915c01541f8df375b50b93b3da565a8 (diff)
161 files changed, 2157 insertions, 2787 deletions
diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index 89c7a13e913..04f77d002c8 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -1,5 +1,3 @@
-AUTOMAKE_OPTIONS = subdir-objects
-
 if HAVE_LOADER_GALLIUM
 SUBDIRS := pipe-loader
 endif
@@ -10,6 +8,7 @@ include $(top_srcdir)/src/gallium/Automake.inc
 noinst_LTLIBRARIES = libgallium.la
 
 AM_CFLAGS = \
+	-I$(top_srcdir)/src/loader \
 	-I$(top_builddir)/src/glsl/nir \
 	-I$(top_srcdir)/src/gallium/auxiliary/util \
 	$(GALLIUM_CFLAGS) \
diff --git a/src/gallium/auxiliary/Makefile.sources b/src/gallium/auxiliary/Makefile.sources
index 62e6b94cab8..3616d885b47 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -274,7 +274,6 @@ C_SOURCES := \
 	util/u_simple_shaders.h \
 	util/u_slab.c \
 	util/u_slab.h \
-	util/u_snprintf.c \
 	util/u_split_prim.h \
 	util/u_sse.h \
 	util/u_staging.c \
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index dd56e4a154e..d36f1fbd717 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -80,7 +80,7 @@ unsigned cso_construct_key(void *item, int item_size)
    return hash_key((item), item_size);
 }
 
-static INLINE struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type)
+static inline struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type)
 {
    struct cso_hash *hash;
    hash = sc->hashes[type];
@@ -127,7 +127,7 @@ static void delete_velements(void *state, void *data)
    FREE(state);
 }
 
-static INLINE void delete_cso(void *state, enum cso_cache_type type)
+static inline void delete_cso(void *state, enum cso_cache_type type)
 {
    switch (type) {
    case CSO_BLEND:
@@ -152,7 +152,7 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type)
 }
 
 
-static INLINE void sanitize_hash(struct cso_cache *sc,
+static inline void sanitize_hash(struct cso_cache *sc,
                                  struct cso_hash *hash,
                                  enum cso_cache_type type,
                                  int max_size)
@@ -162,7 +162,7 @@ static INLINE void sanitize_hash(struct cso_cache *sc,
 }
 
 
-static INLINE void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type,
+static inline void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type,
                                int max_size, void *user_data)
 {
    /* if we're approach the maximum size, remove fourth of the entries
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 744b00cbd92..00686d2af41 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -56,22 +56,8 @@
  */
 struct sampler_info
 {
-   struct {
-      void *samplers[PIPE_MAX_SAMPLERS];
-      unsigned nr_samplers;
-   } hw;
-
    void *samplers[PIPE_MAX_SAMPLERS];
    unsigned nr_samplers;
-
-   void *samplers_saved[PIPE_MAX_SAMPLERS];
-   unsigned nr_samplers_saved;
-
-   struct pipe_sampler_view *views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
-   unsigned nr_views;
-
-   struct pipe_sampler_view *views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS];
-   unsigned nr_views_saved;
 };
 
 
@@ -85,6 +71,15 @@ struct cso_context {
    boolean has_tessellation;
    boolean has_streamout;
 
+   struct pipe_sampler_view *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   unsigned nr_fragment_views;
+
+   struct pipe_sampler_view *fragment_views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   unsigned nr_fragment_views_saved;
+
+   void *fragment_samplers_saved[PIPE_MAX_SAMPLERS];
+   unsigned nr_fragment_samplers_saved;
+
    struct sampler_info samplers[PIPE_SHADER_TYPES];
 
    struct pipe_vertex_buffer aux_vertex_buffer_current;
@@ -116,9 +111,6 @@ struct cso_context {
    uint render_condition_mode, render_condition_mode_saved;
    boolean render_condition_cond, render_condition_cond_saved;
 
-   struct pipe_clip_state clip;
-   struct pipe_clip_state clip_saved;
-
    struct pipe_framebuffer_state fb, fb_saved;
    struct pipe_viewport_state vp, vp_saved;
    struct pipe_blend_color blend_color;
@@ -192,7 +184,7 @@ static boolean delete_vertex_elements(struct cso_context *ctx,
 }
 
 
-static INLINE boolean delete_cso(struct cso_context *ctx,
+static inline boolean delete_cso(struct cso_context *ctx,
                                  void *state, enum cso_cache_type type)
 {
    switch (type) {
@@ -213,7 +205,7 @@ static INLINE boolean delete_cso(struct cso_context *ctx,
    return FALSE;
 }
 
-static INLINE void
+static inline void
 sanitize_hash(struct cso_hash *hash, enum cso_cache_type type,
               int max_size, void *user_data)
 {
@@ -297,7 +289,7 @@ out:
  */
 void cso_destroy_context( struct cso_context *ctx )
 {
-   unsigned i, shader;
+   unsigned i;
 
    if (ctx->pipe) {
       ctx->pipe->set_index_buffer(ctx->pipe, NULL);
@@ -347,13 +339,9 @@ void cso_destroy_context( struct cso_context *ctx )
          ctx->pipe->set_stream_output_targets(ctx->pipe, 0, NULL, NULL);
    }
 
-   /* free sampler views for each shader stage */
-   for (shader = 0; shader < Elements(ctx->samplers); shader++) {
-      struct sampler_info *info = &ctx->samplers[shader];
-      for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
-         pipe_sampler_view_reference(&info->views[i], NULL);
-         pipe_sampler_view_reference(&info->views_saved[i], NULL);
-      }
+   for (i = 0; i < PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
+      pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
+      pipe_sampler_view_reference(&ctx->fragment_views_saved[i], NULL);
    }
 
    util_unreference_framebuffer_state(&ctx->fb);
@@ -919,47 +907,6 @@ void cso_restore_tesseval_shader(struct cso_context *ctx)
    ctx->tesseval_shader_saved = NULL;
 }
 
-/* clip state */
-
-static INLINE void
-clip_state_cpy(struct pipe_clip_state *dst,
-               const struct pipe_clip_state *src)
-{
-   memcpy(dst->ucp, src->ucp, sizeof(dst->ucp));
-}
-
-static INLINE int
-clip_state_cmp(const struct pipe_clip_state *a,
-               const struct pipe_clip_state *b)
-{
-   return memcmp(a->ucp, b->ucp, sizeof(a->ucp));
-}
-
-void
-cso_set_clip(struct cso_context *ctx,
-             const struct pipe_clip_state *clip)
-{
-   if (clip_state_cmp(&ctx->clip, clip)) {
-      clip_state_cpy(&ctx->clip, clip);
-      ctx->pipe->set_clip_state(ctx->pipe, clip);
-   }
-}
-
-void
-cso_save_clip(struct cso_context *ctx)
-{
-   clip_state_cpy(&ctx->clip_saved, &ctx->clip);
-}
-
-void
-cso_restore_clip(struct cso_context *ctx)
-{
-   if (clip_state_cmp(&ctx->clip, &ctx->clip_saved)) {
-      clip_state_cpy(&ctx->clip, &ctx->clip_saved);
-      ctx->pipe->set_clip_state(ctx->pipe, &ctx->clip_saved);
-   }
-}
-
 enum pipe_error
 cso_set_vertex_elements(struct cso_context *ctx,
                         unsigned count,
@@ -1122,11 +1069,9 @@ unsigned cso_get_aux_vertex_buffer_slot(struct cso_context *ctx)
 
 /**************** fragment/vertex sampler view state *************************/
 
-static enum pipe_error
-single_sampler(struct cso_context *ctx,
-               struct sampler_info *info,
-               unsigned idx,
-               const struct pipe_sampler_state *templ)
+enum pipe_error
+cso_single_sampler(struct cso_context *ctx, unsigned shader_stage,
+                   unsigned idx, const struct pipe_sampler_state *templ)
 {
    void *handle = NULL;
 
@@ -1162,24 +1107,13 @@ single_sampler(struct cso_context *ctx,
       }
    }
 
-   info->samplers[idx] = handle;
-
+   ctx->samplers[shader_stage].samplers[idx] = handle;
    return PIPE_OK;
 }
 
-enum pipe_error
-cso_single_sampler(struct cso_context *ctx,
-                   unsigned shader_stage,
-                   unsigned idx,
-                   const struct pipe_sampler_state *templ)
-{
-   return single_sampler(ctx, &ctx->samplers[shader_stage], idx, templ);
-}
-
 
-
-static void
-single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
+void
+cso_single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
 {
    struct sampler_info *info = &ctx->samplers[shader_stage];
    unsigned i;
@@ -1191,33 +1125,8 @@ single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
    }
 
    info->nr_samplers = i;
-
-   if (info->hw.nr_samplers != info->nr_samplers ||
-       memcmp(info->hw.samplers,
-              info->samplers,
-              info->nr_samplers * sizeof(void *)) != 0)
-   {
-      memcpy(info->hw.samplers,
-             info->samplers,
-             info->nr_samplers * sizeof(void *));
-
-      /* set remaining slots/pointers to null */
-      for (i = info->nr_samplers; i < info->hw.nr_samplers; i++)
-         info->samplers[i] = NULL;
-
-      ctx->pipe->bind_sampler_states(ctx->pipe, shader_stage, 0,
-                                     MAX2(info->nr_samplers,
-                                          info->hw.nr_samplers),
-                                     info->samplers);
-
-      info->hw.nr_samplers = info->nr_samplers;
-   }
-}
-
-void
-cso_single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
-{
-   single_sampler_done(ctx, shader_stage);
+   ctx->pipe->bind_sampler_states(ctx->pipe, shader_stage, 0, i,
+                                  info->samplers);
 }
 
 
@@ -1240,38 +1149,42 @@ cso_set_samplers(struct cso_context *ctx,
     */
 
    for (i = 0; i < nr; i++) {
-      temp = single_sampler(ctx, info, i, templates[i]);
+      temp = cso_single_sampler(ctx, shader_stage, i, templates[i]);
       if (temp != PIPE_OK)
          error = temp;
    }
 
    for ( ; i < info->nr_samplers; i++) {
-      temp = single_sampler(ctx, info, i, NULL);
+      temp = cso_single_sampler(ctx, shader_stage, i, NULL);
       if (temp != PIPE_OK)
          error = temp;
    }
 
-   single_sampler_done(ctx, shader_stage);
+   cso_single_sampler_done(ctx, shader_stage);
 
    return error;
 }
 
 void
-cso_save_samplers(struct cso_context *ctx, unsigned shader_stage)
+cso_save_fragment_samplers(struct cso_context *ctx)
 {
-   struct sampler_info *info = &ctx->samplers[shader_stage];
-   info->nr_samplers_saved = info->nr_samplers;
-   memcpy(info->samplers_saved, info->samplers, sizeof(info->samplers));
+   struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
+
+   ctx->nr_fragment_samplers_saved = info->nr_samplers;
+   memcpy(ctx->fragment_samplers_saved, info->samplers,
+          sizeof(info->samplers));
 }
 
 
 void
-cso_restore_samplers(struct cso_context *ctx, unsigned shader_stage)
+cso_restore_fragment_samplers(struct cso_context *ctx)
 {
-   struct sampler_info *info = &ctx->samplers[shader_stage];
-   info->nr_samplers = info->nr_samplers_saved;
-   memcpy(info->samplers, info->samplers_saved, sizeof(info->samplers));
-   single_sampler_done(ctx, shader_stage);
+   struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
+
+   info->nr_samplers = ctx->nr_fragment_samplers_saved;
+   memcpy(info->samplers, ctx->fragment_samplers_saved,
+          sizeof(info->samplers));
+   cso_single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
 }
 
 
@@ -1281,71 +1194,74 @@ cso_set_sampler_views(struct cso_context *ctx,
                       unsigned count,
                       struct pipe_sampler_view **views)
 {
-   struct sampler_info *info = &ctx->samplers[shader_stage];
-   unsigned i;
-   boolean any_change = FALSE;
+   if (shader_stage == PIPE_SHADER_FRAGMENT) {
+      unsigned i;
+      boolean any_change = FALSE;
 
-   /* reference new views */
-   for (i = 0; i < count; i++) {
-      any_change |= info->views[i] != views[i];
-      pipe_sampler_view_reference(&info->views[i], views[i]);
-   }
-   /* unref extra old views, if any */
-   for (; i < info->nr_views; i++) {
-      any_change |= info->views[i] != NULL;
-      pipe_sampler_view_reference(&info->views[i], NULL);
-   }
+      /* reference new views */
+      for (i = 0; i < count; i++) {
+         any_change |= ctx->fragment_views[i] != views[i];
+         pipe_sampler_view_reference(&ctx->fragment_views[i], views[i]);
+      }
+      /* unref extra old views, if any */
+      for (; i < ctx->nr_fragment_views; i++) {
+         any_change |= ctx->fragment_views[i] != NULL;
+         pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
+      }
 
-   /* bind the new sampler views */
-   if (any_change) {
-      ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0,
-                                   MAX2(info->nr_views, count),
-                                   info->views);
-   }
+      /* bind the new sampler views */
+      if (any_change) {
+         ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0,
+                                      MAX2(ctx->nr_fragment_views, count),
+                                      ctx->fragment_views);
+      }
 
-   info->nr_views = count;
+      ctx->nr_fragment_views = count;
+   }
+   else
+      ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0, count, views);
 }
 
 
 void
-cso_save_sampler_views(struct cso_context *ctx, unsigned shader_stage)
+cso_save_fragment_sampler_views(struct cso_context *ctx)
 {
-   struct sampler_info *info = &ctx->samplers[shader_stage];
    unsigned i;
 
-   info->nr_views_saved = info->nr_views;
+   ctx->nr_fragment_views_saved = ctx->nr_fragment_views;
 
-   for (i = 0; i < info->nr_views; i++) {
-      assert(!info->views_saved[i]);
-      pipe_sampler_view_reference(&info->views_saved[i], info->views[i]);
+   for (i = 0; i < ctx->nr_fragment_views; i++) {
+      assert(!ctx->fragment_views_saved[i]);
+      pipe_sampler_view_reference(&ctx->fragment_views_saved[i],
+                                  ctx->fragment_views[i]);
    }
 }
 
 
 void
-cso_restore_sampler_views(struct cso_context *ctx, unsigned shader_stage)
+cso_restore_fragment_sampler_views(struct cso_context *ctx)
 {
-   struct sampler_info *info = &ctx->samplers[shader_stage];
-   unsigned i, nr_saved = info->nr_views_saved;
+   unsigned i, nr_saved = ctx->nr_fragment_views_saved;
    unsigned num;
 
    for (i = 0; i < nr_saved; i++) {
-      pipe_sampler_view_reference(&info->views[i], NULL);
+      pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
       /* move the reference from one pointer to another */
-      info->views[i] = info->views_saved[i];
-      info->views_saved[i] = NULL;
+      ctx->fragment_views[i] = ctx->fragment_views_saved[i];
+      ctx->fragment_views_saved[i] = NULL;
    }
-   for (; i < info->nr_views; i++) {
-      pipe_sampler_view_reference(&info->views[i], NULL);
+   for (; i < ctx->nr_fragment_views; i++) {
+      pipe_sampler_view_reference(&ctx->fragment_views[i], NULL);
    }
 
-   num = MAX2(info->nr_views, nr_saved);
+   num = MAX2(ctx->nr_fragment_views, nr_saved);
 
    /* bind the old/saved sampler views */
-   ctx->pipe->set_sampler_views(ctx->pipe, shader_stage, 0, num, info->views);
+   ctx->pipe->set_sampler_views(ctx->pipe, PIPE_SHADER_FRAGMENT, 0, num,
+                                ctx->fragment_views);
 
-   info->nr_views = nr_saved;
-   info->nr_views_saved = 0;
+   ctx->nr_fragment_views = nr_saved;
+   ctx->nr_fragment_views_saved = 0;
 }
 
 
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index cc50b60c6cd..f0a27390d17 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -72,19 +72,17 @@ cso_set_samplers(struct cso_context *cso,
                  const struct pipe_sampler_state **states);
 
 void
-cso_save_samplers(struct cso_context *cso, unsigned shader_stage);
+cso_save_fragment_samplers(struct cso_context *cso);
 
 void
-cso_restore_samplers(struct cso_context *cso, unsigned shader_stage);
+cso_restore_fragment_samplers(struct cso_context *cso);
 
 /* Alternate interface to support state trackers that like to modify
  * samplers one at a time:
  */
 enum pipe_error
-cso_single_sampler(struct cso_context *cso,
-                   unsigned shader_stage,
-                   unsigned count,
-                   const struct pipe_sampler_state *states);
+cso_single_sampler(struct cso_context *cso, unsigned shader_stage,
+                   unsigned idx, const struct pipe_sampler_state *states);
 
 void
 cso_single_sampler_done(struct cso_context *cso, unsigned shader_stage);
@@ -188,19 +186,6 @@ void cso_save_render_condition(struct cso_context *cso);
 void cso_restore_render_condition(struct cso_context *cso);
 
 
-/* clip state */
-
-void
-cso_set_clip(struct cso_context *cso,
-             const struct pipe_clip_state *clip);
-
-void
-cso_save_clip(struct cso_context *cso);
-
-void
-cso_restore_clip(struct cso_context *cso);
-
-
 /* sampler view state */
 
 void
@@ -210,10 +195,10 @@ cso_set_sampler_views(struct cso_context *cso,
                       struct pipe_sampler_view **views);
 
 void
-cso_save_sampler_views(struct cso_context *cso, unsigned shader_stage);
+cso_save_fragment_sampler_views(struct cso_context *ctx);
 
 void
-cso_restore_sampler_views(struct cso_context *cso, unsigned shader_stage);
+cso_restore_fragment_sampler_views(struct cso_context *ctx);
 
 
 /* constant buffers */
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index a1564f93292..c827a68ea0a 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -45,7 +45,7 @@
 /* fixme: move it from here */
 #define MAX_PRIMITIVES 64
 
-static INLINE int
+static inline int
 draw_gs_get_input_index(int semantic, int index,
                         const struct tgsi_shader_info *input_info)
 {
@@ -66,7 +66,7 @@ draw_gs_get_input_index(int semantic, int index,
  * the number of elements in the SOA vector. This ensures that the
  * throughput is optimized for the given vector instruction set.
  */
-static INLINE boolean
+static inline boolean
 draw_gs_should_flush(struct draw_geometry_shader *shader)
 {
    return (shader->fetched_prim_count == shader->vector_length);
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 90a31bc6ac0..b1e1bcbee04 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -72,7 +72,7 @@ struct draw_gs_llvm_iface {
    LLVMValueRef input;
 };
 
-static INLINE const struct draw_gs_llvm_iface *
+static inline const struct draw_gs_llvm_iface *
 draw_gs_llvm_iface(const struct lp_build_tgsi_gs_iface *iface)
 {
    return (const struct draw_gs_llvm_iface *)iface;
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index d48ed721593..d153c166ead 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -350,7 +350,7 @@ struct draw_gs_llvm_variant_key
     PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state))
 
 
-static INLINE size_t
+static inline size_t
 draw_llvm_variant_key_size(unsigned nr_vertex_elements,
                            unsigned nr_samplers)
 {
@@ -360,7 +360,7 @@ draw_llvm_variant_key_size(unsigned nr_vertex_elements,
 }
 
 
-static INLINE size_t
+static inline size_t
 draw_gs_llvm_variant_key_size(unsigned nr_samplers)
 {
    return (sizeof(struct draw_gs_llvm_variant_key) +
@@ -368,7 +368,7 @@ draw_gs_llvm_variant_key_size(unsigned nr_samplers)
 }
 
 
-static INLINE struct draw_sampler_static_state *
+static inline struct draw_sampler_static_state *
 draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key)
 {
    return (struct draw_sampler_static_state *)
@@ -476,13 +476,13 @@ struct draw_llvm {
 };
 
 
-static INLINE struct llvm_vertex_shader *
+static inline struct llvm_vertex_shader *
 llvm_vertex_shader(struct draw_vertex_shader *vs)
 {
    return (struct llvm_vertex_shader *)vs;
 }
 
-static INLINE struct llvm_geometry_shader *
+static inline struct llvm_geometry_shader *
 llvm_geometry_shader(struct draw_geometry_shader *gs)
 {
    return (struct llvm_geometry_shader *)gs;
diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h
index 35273330d13..e69dcbded0e 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.h
+++ b/src/gallium/auxiliary/draw/draw_pipe.h
@@ -115,7 +115,7 @@ void draw_unfilled_prepare_outputs(struct draw_context *context,
  * \param idx  index into stage's tmp[] array to put the copy (dest)
  * \return  pointer to the copied vertex
  */
-static INLINE struct vertex_header *
+static inline struct vertex_header *
 dup_vert( struct draw_stage *stage,
 	  const struct vertex_header *vert,
 	  unsigned idx )
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 936046ea5f5..85d24b7a6a1 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -511,7 +511,7 @@ bind_aaline_fragment_shader(struct aaline_stage *aaline)
 
 
 
-static INLINE struct aaline_stage *
+static inline struct aaline_stage *
 aaline_stage( struct draw_stage *stage )
 {
    return (struct aaline_stage *) stage;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 7feb49ae934..3918923296d 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -427,7 +427,7 @@ bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
 
 
 
-static INLINE struct aapoint_stage *
+static inline struct aapoint_stage *
 aapoint_stage( struct draw_stage *stage )
 {
    return (struct aapoint_stage *) stage;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index e1e7dcc6f63..c22758bc702 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -70,12 +70,12 @@ struct clip_stage {
 
 
 /** Cast wrapper */
-static INLINE struct clip_stage *clip_stage( struct draw_stage *stage )
+static inline struct clip_stage *clip_stage( struct draw_stage *stage )
 {
    return (struct clip_stage *)stage;
 }
 
-static INLINE unsigned
+static inline unsigned
 draw_viewport_index(struct draw_context *draw,
                     const struct vertex_header *leading_vertex)
 {
@@ -210,7 +210,7 @@ static void interp( const struct clip_stage *clip,
  * true, otherwise returns false.
  * Triangle is considered null/empty if it's area is qual to zero.
  */
-static INLINE boolean
+static inline boolean
 is_tri_null(struct draw_context *draw, const struct prim_header *header)
 {
    const unsigned pos_attr = draw_current_shader_position_output(draw);
@@ -322,7 +322,7 @@ static void emit_poly( struct draw_stage *stage,
 }
 
 
-static INLINE float
+static inline float
 dot4(const float *a, const float *b)
 {
    return (a[0] * b[0] +
@@ -336,7 +336,7 @@ dot4(const float *a, const float *b)
  * it first checks if the shader provided a clip distance, otherwise
  * it works out the value using the clipvertex
  */
-static INLINE float getclipdist(const struct clip_stage *clipper,
+static inline float getclipdist(const struct clip_stage *clipper,
                                 struct vertex_header *vert,
                                 int plane_idx)
 {
diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c
index fa344089a8a..fc8293bd128 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -46,12 +46,12 @@ struct cull_stage {
 };
 
 
-static INLINE struct cull_stage *cull_stage( struct draw_stage *stage )
+static inline struct cull_stage *cull_stage( struct draw_stage *stage )
 {
    return (struct cull_stage *)stage;
 }
 
-static INLINE boolean
+static inline boolean
 cull_distance_is_out(float dist)
 {
    return (dist < 0.0f) || util_is_inf_or_nan(dist);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
index 59e33b472f4..0ea740861d6 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c
@@ -47,7 +47,7 @@ struct flat_stage
 };
 
 
-static INLINE struct flat_stage *
+static inline struct flat_stage *
 flat_stage(struct draw_stage *stage)
 {
    return (struct flat_stage *) stage;
@@ -55,7 +55,7 @@ flat_stage(struct draw_stage *stage)
 
 
 /** Copy all the constant attributes from 'src' vertex to 'dst' vertex */
-static INLINE void copy_flats( struct draw_stage *stage,
+static inline void copy_flats( struct draw_stage *stage,
                                struct vertex_header *dst,
                                const struct vertex_header *src )
 {
@@ -70,7 +70,7 @@ static INLINE void copy_flats( struct draw_stage *stage,
 
 
 /** Copy all the color attributes from src vertex to dst0 & dst1 vertices */
-static INLINE void copy_flats2( struct draw_stage *stage,
+static inline void copy_flats2( struct draw_stage *stage,
                                 struct vertex_header *dst0,
                                 struct vertex_header *dst1,
                                 const struct vertex_header *src )
diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
index b25dd21fd4d..5e0d8ce793d 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -49,7 +49,7 @@ struct offset_stage {
 
 
 
-static INLINE struct offset_stage *offset_stage( struct draw_stage *stage )
+static inline struct offset_stage *offset_stage( struct draw_stage *stage )
 {
    return (struct offset_stage *) stage;
 }
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 445f195e59c..186b4cb4935 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -462,7 +462,7 @@ bind_pstip_fragment_shader(struct pstip_stage *pstip)
 }
 
 
-static INLINE struct pstip_stage *
+static inline struct pstip_stage *
 pstip_stage( struct draw_stage *stage )
 {
    return (struct pstip_stage *) stage;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 476c011b9a0..381aa41530b 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -53,7 +53,7 @@ struct stipple_stage {
 };
 
 
-static INLINE struct stipple_stage *
+static inline struct stipple_stage *
 stipple_stage(struct draw_stage *stage)
 {
    return (struct stipple_stage *) stage;
@@ -108,7 +108,7 @@ emit_segment(struct draw_stage *stage, struct prim_header *header,
 }
 
 
-static INLINE unsigned
+static inline unsigned
 stipple_test(int counter, ushort pattern, int factor)
 {
    int b = (counter / factor) & 0xf;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
index 8148f6b4569..7f958d9b985 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c
@@ -43,7 +43,7 @@ struct twoside_stage {
 };
 
 
-static INLINE struct twoside_stage *twoside_stage( struct draw_stage *stage )
+static inline struct twoside_stage *twoside_stage( struct draw_stage *stage )
 {
    return (struct twoside_stage *)stage;
 }
@@ -51,7 +51,7 @@ static INLINE struct twoside_stage *twoside_stage( struct draw_stage *stage )
 /**
  * Copy back color(s) to front color(s).
  */
-static INLINE struct vertex_header *
+static inline struct vertex_header *
 copy_bfc( struct twoside_stage *twoside, 
           const struct vertex_header *v,
           unsigned idx )
diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
index 51fbdb97ae8..8e6435cdbb4 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c
@@ -53,7 +53,7 @@ struct unfilled_stage {
 };
 
 
-static INLINE struct unfilled_stage *unfilled_stage( struct draw_stage *stage )
+static inline struct unfilled_stage *unfilled_stage( struct draw_stage *stage )
 {
    return (struct unfilled_stage *)stage;
 }
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index e0e32dd9bbe..5cc866d7eee 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -85,7 +85,7 @@ struct vbuf_stage {
 /**
  * Basically a cast wrapper.
  */
-static INLINE struct vbuf_stage *
+static inline struct vbuf_stage *
 vbuf_stage( struct draw_stage *stage )
 {
    assert(stage);
@@ -97,7 +97,7 @@ static void vbuf_flush_vertices( struct vbuf_stage *vbuf );
 static void vbuf_alloc_vertices( struct vbuf_stage *vbuf );
 
 
-static INLINE boolean 
+static inline boolean 
 overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz )
 {
    unsigned long used = (unsigned long) ((char *)ptr - (char *)map);
@@ -105,7 +105,7 @@ overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz )
 }
 
 
-static INLINE void 
+static inline void 
 check_space( struct vbuf_stage *vbuf, unsigned nr )
 {
    if (vbuf->nr_vertices + nr > vbuf->max_vertices ||
@@ -126,7 +126,7 @@ check_space( struct vbuf_stage *vbuf, unsigned nr )
  * have a couple of slots at the beginning (1-dword header, 4-dword
  * clip pos) that we ignore here.  We only use the vertex->data[] fields.
  */
-static INLINE ushort 
+static inline ushort 
 emit_vertex( struct vbuf_stage *vbuf,
              struct vertex_header *vertex )
 {
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index 6c57d5c1e3e..38ac11a9adf 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -45,7 +45,7 @@ struct wideline_stage {
 
 
 
-static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage )
+static inline struct wideline_stage *wideline_stage( struct draw_stage *stage )
 {
    return (struct wideline_stage *)stage;
 }
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index 05beba8cd97..348b0e93bbc 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -83,7 +83,7 @@ struct widepoint_stage {
 
 
 
-static INLINE struct widepoint_stage *
+static inline struct widepoint_stage *
 widepoint_stage( struct draw_stage *stage )
 {
    return (struct widepoint_stage *)stage;
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 7b893cb2692..0ad94bb031f 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -494,7 +494,7 @@ void draw_update_viewport_flags(struct draw_context *draw);
  * Return index of the given viewport clamping it
  * to be between 0 <= and < PIPE_MAX_VIEWPORTS
  */
-static INLINE unsigned
+static inline unsigned
 draw_clamp_viewport_idx(int idx)
 {
    return ((PIPE_MAX_VIEWPORTS > idx && idx >= 0) ? idx : 0);
@@ -505,7 +505,7 @@ draw_clamp_viewport_idx(int idx)
  * overflows then it returns the value from
  * the overflow_value variable.
  */
-static INLINE unsigned
+static inline unsigned
 draw_overflow_uadd(unsigned a, unsigned b,
                    unsigned overflow_value)
 {
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 5af845ff938..ffec863ae6f 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -54,7 +54,7 @@ struct fetch_pipeline_middle_end {
 
 
 /** cast wrapper */
-static INLINE struct fetch_pipeline_middle_end *
+static inline struct fetch_pipeline_middle_end *
 fetch_pipeline_middle_end(struct draw_pt_middle_end *middle)
 {
    return (struct fetch_pipeline_middle_end *) middle;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index d17d6959b44..e42c4af0e70 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -60,7 +60,7 @@ struct llvm_middle_end {
 
 
 /** cast wrapper */
-static INLINE struct llvm_middle_end *
+static inline struct llvm_middle_end *
 llvm_middle_end(struct draw_pt_middle_end *middle)
 {
    return (struct llvm_middle_end *) middle;
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 71a7d3918e9..f0d5e0f5656 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -53,7 +53,7 @@ struct pt_post_vs {
                    const struct draw_prim_info *prim_info );
 };
 
-static INLINE void
+static inline void
 initialize_vertex_header(struct vertex_header *header)
 {
    header->clipmask = 0;
@@ -62,7 +62,7 @@ initialize_vertex_header(struct vertex_header *header)
    header->vertex_id = UNDEFINED_VERTEX_ID;
 }
 
-static INLINE float
+static inline float
 dot4(const float *a, const float *b)
 {
    return (a[0]*b[0] +
diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
index 91e67c0840d..20de26fd08a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -65,7 +65,7 @@ draw_so_info(const struct draw_context *draw)
    return state;
 }
 
-static INLINE boolean
+static inline boolean
 draw_has_so(const struct draw_context *draw)
 {
    const struct pipe_stream_output_info *state = draw_so_info(draw);
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit.c b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
index 8098adea61f..8d448f92a26 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit.c
@@ -84,7 +84,7 @@ vsplit_flush_cache(struct vsplit_frontend *vsplit, unsigned flags)
 /**
  * Add a fetch element and add it to the draw elements.
  */
-static INLINE void
+static inline void
 vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch, unsigned ofbias)
 {
    unsigned hash;
@@ -111,7 +111,7 @@ vsplit_add_cache(struct vsplit_frontend *vsplit, unsigned fetch, unsigned ofbias
  * The value is checked for overflows (both integer overflows
  * and the elements array overflow).
  */
-static INLINE unsigned
+static inline unsigned
 vsplit_get_base_idx(struct vsplit_frontend *vsplit,
                     unsigned start, unsigned fetch, unsigned *ofbit)
 {
@@ -137,7 +137,7 @@ vsplit_get_base_idx(struct vsplit_frontend *vsplit,
  * index, plus the element bias, clamped to maximum elememt
  * index if that addition overflows.
  */
-static INLINE unsigned
+static inline unsigned
 vsplit_get_bias_idx(struct vsplit_frontend *vsplit,
                     int idx, int bias, unsigned *ofbias)
 {
@@ -170,7 +170,7 @@ vsplit_get_bias_idx(struct vsplit_frontend *vsplit,
    elt_idx = vsplit_get_base_idx(vsplit, start, fetch, &ofbit);          \
    elt_idx = vsplit_get_bias_idx(vsplit, ofbit ? 0 : DRAW_GET_IDX(elts, elt_idx), elt_bias, &ofbias)
 
-static INLINE void
+static inline void
 vsplit_add_cache_ubyte(struct vsplit_frontend *vsplit, const ubyte *elts,
                        unsigned start, unsigned fetch, int elt_bias)
 {
@@ -179,7 +179,7 @@ vsplit_add_cache_ubyte(struct vsplit_frontend *vsplit, const ubyte *elts,
    vsplit_add_cache(vsplit, elt_idx, ofbias);
 }
 
-static INLINE void
+static inline void
 vsplit_add_cache_ushort(struct vsplit_frontend *vsplit, const ushort *elts,
                        unsigned start, unsigned fetch, int elt_bias)
 {
@@ -193,7 +193,7 @@ vsplit_add_cache_ushort(struct vsplit_frontend *vsplit, const ushort *elts,
  * Add a fetch element and add it to the draw elements.  The fetch element is
  * in full range (uint).
  */
-static INLINE void
+static inline void
 vsplit_add_cache_uint(struct vsplit_frontend *vsplit, const uint *elts,
                       unsigned start, unsigned fetch, int elt_bias)
 {
diff --git a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
index 0f7a3cdc012..0afabb01398 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -129,7 +129,7 @@ CONCAT(vsplit_primitive_, ELT_TYPE)(struct vsplit_frontend *vsplit,
  * When spoken is TRUE, ispoken replaces istart;  When close is TRUE, iclose is
  * appended.
  */
-static INLINE void
+static inline void
 CONCAT(vsplit_segment_cache_, ELT_TYPE)(struct vsplit_frontend *vsplit,
                                         unsigned flags,
                                         unsigned istart, unsigned icount,
diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h
index b4178d6a6c5..ee11d2f9276 100644
--- a/src/gallium/auxiliary/draw/draw_vertex.h
+++ b/src/gallium/auxiliary/draw/draw_vertex.h
@@ -91,13 +91,13 @@ struct vertex_info
    } attrib[PIPE_MAX_SHADER_OUTPUTS];
 };
 
-static INLINE size_t
+static inline size_t
 draw_vinfo_size( const struct vertex_info *a )
 {
    return offsetof(const struct vertex_info, attrib[a->num_attribs]);
 }
 
-static INLINE int
+static inline int
 draw_vinfo_compare( const struct vertex_info *a,
                     const struct vertex_info *b )
 {
@@ -105,7 +105,7 @@ draw_vinfo_compare( const struct vertex_info *a,
    return memcmp( a, b, sizea );
 }
 
-static INLINE void
+static inline void
 draw_vinfo_copy( struct vertex_info *dst,
                  const struct vertex_info *src )
 {
@@ -121,7 +121,7 @@ draw_vinfo_copy( struct vertex_info *dst,
  *                   corresponds to this attribute.
  * \return slot in which the attribute was added
  */
-static INLINE uint
+static inline uint
 draw_emit_vertex_attr(struct vertex_info *vinfo,
                       enum attrib_emit emit, 
                       enum interp_mode interp, /* only used by softpipe??? */
@@ -150,7 +150,7 @@ void draw_dump_emitted_vertex(const struct vertex_info *vinfo,
                               const uint8_t *data);
 
 
-static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit)
+static inline enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit)
 {
    switch (emit) {
    case EMIT_OMIT:
@@ -174,7 +174,7 @@ static INLINE enum pipe_format draw_translate_vinfo_format(enum attrib_emit emit
    }
 }
 
-static INLINE unsigned draw_translate_vinfo_size(enum attrib_emit emit)
+static inline unsigned draw_translate_vinfo_size(enum attrib_emit emit)
 {
    switch (emit) {
    case EMIT_OMIT:
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 1d54e7ef298..24b29e70dd9 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -191,12 +191,12 @@ draw_vs_create_variant_generic( struct draw_vertex_shader *vs,
 
 
 
-static INLINE int draw_vs_variant_keysize( const struct draw_vs_variant_key *key )
+static inline int draw_vs_variant_keysize( const struct draw_vs_variant_key *key )
 {
    return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_variant_element);
 }
 
-static INLINE int draw_vs_variant_key_compare( const struct draw_vs_variant_key *a,
+static inline int draw_vs_variant_key_compare( const struct draw_vs_variant_key *a,
                                          const struct draw_vs_variant_key *b )
 {
    int keysize = draw_vs_variant_keysize(a);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 9daa93eec3e..50ae192325b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -1135,7 +1135,7 @@ lp_build_div(struct lp_build_context *bld,
  *
  * @sa http://www.stereopsis.com/doubleblend.html
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_lerp_simple(struct lp_build_context *bld,
                      LLVMValueRef x,
                      LLVMValueRef v0,
@@ -1674,7 +1674,7 @@ enum lp_build_round_mode
  * NOTE: In the SSE4.1's nearest mode, if two values are equally close, the
  * result is the even value.  That is, rounding 2.5 will be 2.0, and not 3.0.
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_round_sse41(struct lp_build_context *bld,
                      LLVMValueRef a,
                      enum lp_build_round_mode mode)
@@ -1717,7 +1717,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
       args[2] = LLVMConstInt(i32t, mode, 0);
 
       res = lp_build_intrinsic(builder, intrinsic,
-                               vec_type, args, Elements(args));
+                               vec_type, args, Elements(args), 0);
 
       res = LLVMBuildExtractElement(builder, res, index0, "");
    }
@@ -1761,7 +1761,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
 }
 
 
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_iround_nearest_sse2(struct lp_build_context *bld,
                              LLVMValueRef a)
 {
@@ -1817,7 +1817,7 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld,
 
 /*
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_round_altivec(struct lp_build_context *bld,
                        LLVMValueRef a,
                        enum lp_build_round_mode mode)
@@ -1851,7 +1851,7 @@ lp_build_round_altivec(struct lp_build_context *bld,
    return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
 }
 
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_round_arch(struct lp_build_context *bld,
                     LLVMValueRef a,
                     enum lp_build_round_mode mode)
@@ -1997,6 +1997,12 @@ lp_build_floor(struct lp_build_context *bld,
       LLVMTypeRef int_vec_type = bld->int_vec_type;
       LLVMTypeRef vec_type = bld->vec_type;
 
+      if (type.width != 32) {
+         char intrinsic[32];
+         util_snprintf(intrinsic, sizeof intrinsic, "llvm.floor.v%uf%u", type.length, type.width);
+         return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
+      }
+
       assert(type.width == 32); /* might want to handle doubles at some point */
 
       inttype = type;
@@ -2066,6 +2072,12 @@ lp_build_ceil(struct lp_build_context *bld,
       LLVMTypeRef int_vec_type = bld->int_vec_type;
       LLVMTypeRef vec_type = bld->vec_type;
 
+      if (type.width != 32) {
+         char intrinsic[32];
+         util_snprintf(intrinsic, sizeof intrinsic, "llvm.ceil.v%uf%u", type.length, type.width);
+         return lp_build_intrinsic_unary(builder, intrinsic, vec_type, a);
+      }
+
       assert(type.width == 32); /* might want to handle doubles at some point */
 
       inttype = type;
@@ -2427,7 +2439,7 @@ lp_build_sqrt(struct lp_build_context *bld,
  * - http://en.wikipedia.org/wiki/Division_(digital)#Newton.E2.80.93Raphson_division
  * - http://softwarecommunity.intel.com/articles/eng/1818.htm
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_rcp_refine(struct lp_build_context *bld,
                     LLVMValueRef a,
                     LLVMValueRef rcp_a)
@@ -2512,7 +2524,7 @@ lp_build_rcp(struct lp_build_context *bld,
  *
  * See also Intel 64 and IA-32 Architectures Optimization Manual.
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_rsqrt_refine(struct lp_build_context *bld,
                       LLVMValueRef a,
                       LLVMValueRef rsqrt_a)
@@ -3535,7 +3547,7 @@ lp_build_fpstate_get(struct gallivm_state *gallivm)
       lp_build_intrinsic(builder,
                          "llvm.x86.sse.stmxcsr",
                          LLVMVoidTypeInContext(gallivm->context),
-                         &mxcsr_ptr8, 1);
+                         &mxcsr_ptr8, 1, 0);
       return mxcsr_ptr;
    }
    return 0;
@@ -3582,6 +3594,6 @@ lp_build_fpstate_set(struct gallivm_state *gallivm,
       lp_build_intrinsic(builder,
                          "llvm.x86.sse.ldmxcsr",
                          LLVMVoidTypeInContext(gallivm->context),
-                         &mxcsr_ptr, 1);
+                         &mxcsr_ptr, 1, 0);
    }
 }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_const.h b/src/gallium/auxiliary/gallivm/lp_bld_const.h
index b17c41931f4..a4c3bf0977a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.h
@@ -120,14 +120,14 @@ lp_build_const_mask_aos_swizzled(struct gallivm_state *gallivm,
                                  const unsigned char *swizzle);
 
 
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_const_int32(struct gallivm_state *gallivm, int i)
 {
    return LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), i, 0);
 }
 
 
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_const_float(struct gallivm_state *gallivm, float x)
 {
    return LLVMConstReal(LLVMFloatTypeInContext(gallivm->context), x);
@@ -135,7 +135,7 @@ lp_build_const_float(struct gallivm_state *gallivm, float x)
 
 
 /** Return constant-valued pointer to int */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_const_int_pointer(struct gallivm_state *gallivm, const void *ptr)
 {
    LLVMTypeRef int_type;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
index 405e6486f7a..7283e2f162f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -61,6 +61,7 @@ lp_check_alignment(const void *ptr, unsigned alignment)
    return ((uintptr_t)ptr & (alignment - 1)) == 0;
 }
 
+#if (defined(PIPE_OS_WINDOWS) && !defined(PIPE_CC_MSVC)) || defined(PIPE_OS_EMBEDDED)
 
 class raw_debug_ostream :
    public llvm::raw_ostream
@@ -91,6 +92,7 @@ raw_debug_ostream::write_impl(const char *Ptr, size_t Size)
    }
 }
 
+#endif
 
 extern "C" const char *
 lp_get_module_id(LLVMModuleRef module)
@@ -123,7 +125,7 @@ lp_debug_dump_value(LLVMValueRef value)
  * - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
  */
 static size_t
-disassemble(const void* func, llvm::raw_ostream & Out)
+disassemble(const void* func)
 {
    const uint8_t *bytes = (const uint8_t *)func;
 
@@ -141,7 +143,8 @@ disassemble(const void* func, llvm::raw_ostream & Out)
    char outline[1024];
 
    if (!D) {
-      Out << "error: couldn't create disassembler for triple " << Triple << "\n";
+      _debug_printf("error: couldn't create disassembler for triple %s\n",
+                    Triple.c_str());
       return 0;
    }
 
@@ -155,13 +158,13 @@ disassemble(const void* func, llvm::raw_ostream & Out)
        * so that between runs.
        */
 
-      Out << llvm::format("%6lu:\t", (unsigned long)pc);
+      _debug_printf("%6lu:\t", (unsigned long)pc);
 
       Size = LLVMDisasmInstruction(D, (uint8_t *)bytes + pc, extent - pc, 0, outline,
                                    sizeof outline);
 
       if (!Size) {
-         Out << "invalid\n";
+         _debug_printf("invalid\n");
          pc += 1;
          break;
       }
@@ -173,10 +176,10 @@ disassemble(const void* func, llvm::raw_ostream & Out)
       if (0) {
          unsigned i;
          for (i = 0; i < Size; ++i) {
-            Out << llvm::format("%02x ", bytes[pc + i]);
+            _debug_printf("%02x ", bytes[pc + i]);
          }
          for (; i < 16; ++i) {
-            Out << "   ";
+            _debug_printf("   ");
          }
       }
 
@@ -184,9 +187,9 @@ disassemble(const void* func, llvm::raw_ostream & Out)
        * Print the instruction.
        */
 
-      Out << outline;
+      _debug_printf("%*s", Size, outline);
 
-      Out << "\n";
+      _debug_printf("\n");
 
       /*
        * Stop disassembling on return statements, if there is no record of a
@@ -206,13 +209,12 @@ disassemble(const void* func, llvm::raw_ostream & Out)
       pc += Size;
 
       if (pc >= extent) {
-         Out << "disassembly larger than " << extent << "bytes, aborting\n";
+         _debug_printf("disassembly larger than %ull bytes, aborting\n", extent);
          break;
       }
    }
 
-   Out << "\n";
-   Out.flush();
+   _debug_printf("\n");
 
    LLVMDisasmDispose(D);
 
@@ -229,9 +231,8 @@ disassemble(const void* func, llvm::raw_ostream & Out)
 
 extern "C" void
 lp_disassemble(LLVMValueRef func, const void *code) {
-   raw_debug_ostream Out;
-   Out << LLVMGetValueName(func) << ":\n";
-   disassemble(code, Out);
+   _debug_printf("%s:\n", LLVMGetValueName(func));
+   disassemble(code);
 }
 
 
@@ -273,7 +274,7 @@ lp_profile(LLVMValueRef func, const void *code)
       unsigned long addr = (uintptr_t)code;
       llvm::raw_fd_ostream Out(perf_asm_fd, false);
       Out << symbol << ":\n";
-      unsigned long size = disassemble(code, Out);
+      unsigned long size = disassemble(code);
       fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
       fflush(perf_map_file);
    }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
index 321e09d56b9..375ba6cb5ff 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
@@ -59,7 +59,7 @@ extern unsigned gallivm_debug;
 #endif
 
 
-static INLINE void
+static inline void
 lp_build_name(LLVMValueRef val, const char *format, ...)
 {
 #ifdef DEBUG
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
index efe71704c3a..ddf3ad1dfc6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -95,7 +95,7 @@ lp_build_format_swizzle_aos(const struct util_format_description *desc,
 /**
  * Whether the format matches the vector type, apart of swizzles.
  */
-static INLINE boolean
+static inline boolean
 format_matches_type(const struct util_format_description *desc,
                     struct lp_type type)
 {
@@ -146,7 +146,7 @@ format_matches_type(const struct util_format_description *desc,
  *
  * @return XYZW in a float[4] or ubyte[4] or ushort[4] vector.
  */
-static INLINE LLVMValueRef
+static inline LLVMValueRef
 lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm,
                                const struct util_format_description *desc,
                                LLVMValueRef packed)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
index 4f5a45c6a3d..fa0e8b656bb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c
@@ -212,7 +212,7 @@ yuyv_to_yuv_soa(struct gallivm_state *gallivm,
 }
 
 
-static INLINE void
+static inline void
 yuv_to_rgb_soa(struct gallivm_state *gallivm,
                unsigned n,
                LLVMValueRef y, LLVMValueRef u, LLVMValueRef v,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 384ea864081..017d0752060 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -106,7 +106,6 @@ enum LLVM_CodeGenOpt_Level {
 static boolean
 create_pass_manager(struct gallivm_state *gallivm)
 {
-   char *td_str;
    assert(!gallivm->passmgr);
    assert(gallivm->target);
 
@@ -122,10 +121,29 @@ create_pass_manager(struct gallivm_state *gallivm)
    // Old versions of LLVM get the DataLayout from the pass manager.
    LLVMAddTargetData(gallivm->target, gallivm->passmgr);
 
-   // New ones from the Module.
-   td_str = LLVMCopyStringRepOfTargetData(gallivm->target);
-   LLVMSetDataLayout(gallivm->module, td_str);
-   free(td_str);
+   /* Setting the module's DataLayout to an empty string will cause the
+    * ExecutionEngine to copy to the DataLayout string from its target
+    * machine to the module.  As of LLVM 3.8 the module and the execution
+    * engine are required to have the same DataLayout.
+    *
+    * TODO: This is just a temporary work-around.  The correct solution is
+    * for gallivm_init_state() to create a TargetMachine and pull the
+    * DataLayout from there.  Currently, the TargetMachine used by llvmpipe
+    * is being implicitly created by the EngineBuilder in
+    * lp_build_create_jit_compiler_for_module()
+    */
+
+#if HAVE_LLVM < 0x0308
+   {
+      char *td_str;
+      // New ones from the Module.
+      td_str = LLVMCopyStringRepOfTargetData(gallivm->target);
+      LLVMSetDataLayout(gallivm->module, td_str);
+      free(td_str);
+   }
+#else
+   LLVMSetDataLayout(gallivm->module, "");
+#endif
 
    if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
       /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.c b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
index 2bf1211bcd7..30f4863ec44 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.c
@@ -81,7 +81,8 @@ lp_build_intrinsic(LLVMBuilderRef builder,
                    const char *name,
                    LLVMTypeRef ret_type,
                    LLVMValueRef *args,
-                   unsigned num_args)
+                   unsigned num_args,
+                   LLVMAttribute attr)
 {
    LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
    LLVMValueRef function;
@@ -99,6 +100,9 @@ lp_build_intrinsic(LLVMBuilderRef builder,
       }
 
       function = lp_declare_intrinsic(module, name, ret_type, arg_types, num_args);
+
+      if (attr)
+          LLVMAddFunctionAttr(function, attr);
    }
 
    return LLVMBuildCall(builder, function, args, num_args, "");
@@ -111,7 +115,7 @@ lp_build_intrinsic_unary(LLVMBuilderRef builder,
                          LLVMTypeRef ret_type,
                          LLVMValueRef a)
 {
-   return lp_build_intrinsic(builder, name, ret_type, &a, 1);
+   return lp_build_intrinsic(builder, name, ret_type, &a, 1, 0);
 }
 
 
@@ -127,7 +131,7 @@ lp_build_intrinsic_binary(LLVMBuilderRef builder,
    args[0] = a;
    args[1] = b;
 
-   return lp_build_intrinsic(builder, name, ret_type, args, 2);
+   return lp_build_intrinsic(builder, name, ret_type, args, 2, 0);
 }
 
 
@@ -242,7 +246,7 @@ lp_build_intrinsic_map(struct gallivm_state *gallivm,
       LLVMValueRef res_elem;
       for(j = 0; j < num_args; ++j)
          arg_elems[j] = LLVMBuildExtractElement(builder, args[j], index, "");
-      res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args);
+      res_elem = lp_build_intrinsic(builder, name, ret_elem_type, arg_elems, num_args, 0);
       res = LLVMBuildInsertElement(builder, res, res_elem, index, "");
    }
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_intr.h b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
index 38c5c29c980..a54b367961a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_intr.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_intr.h
@@ -59,7 +59,8 @@ lp_build_intrinsic(LLVMBuilderRef builder,
                    const char *name,
                    LLVMTypeRef ret_type,
                    LLVMValueRef *args,
-                   unsigned num_args);
+                   unsigned num_args,
+                   LLVMAttribute attr);
 
 
 LLVMValueRef
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_limits.h b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
index db503514881..571c615f9f8 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -88,7 +88,7 @@
  * actually try to allocate the maximum and run out of memory and crash.  So
  * stick with something reasonable here.
  */
-static INLINE int
+static inline int
 gallivm_get_shader_param(enum pipe_shader_cap param)
 {
    switch(param) {
@@ -100,7 +100,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
       return LP_MAX_TGSI_NESTING;
    case PIPE_SHADER_CAP_MAX_INPUTS:
-      return PIPE_MAX_SHADER_INPUTS;
+      return 32;
    case PIPE_SHADER_CAP_MAX_OUTPUTS:
       return 32;
    case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
@@ -132,6 +132,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
    case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
       return 1;
    case PIPE_SHADER_CAP_DOUBLES:
+      return 1;
    case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
    case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
index 80b53e5c3f8..19d30d0d63c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c
@@ -395,7 +395,7 @@ lp_build_select(struct lp_build_context *bld,
       args[2] = mask;
 
       res = lp_build_intrinsic(builder, intrinsic,
-                               arg_type, args, Elements(args));
+                               arg_type, args, Elements(args), 0);
 
       if (arg_type != bld->vec_type) {
          res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
index 640b7e0d7e0..eba758da6ae 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -371,7 +371,7 @@ struct lp_build_sample_context
  * We only support a few wrap modes in lp_build_sample_wrap_linear_int() at
  * this time.  Return whether the given mode is supported by that function.
  */
-static INLINE boolean
+static inline boolean
 lp_is_simple_wrap_mode(unsigned mode)
 {
    switch (mode) {
@@ -384,7 +384,7 @@ lp_is_simple_wrap_mode(unsigned mode)
 }
 
 
-static INLINE void
+static inline void
 apply_sampler_swizzle(struct lp_build_sample_context *bld,
                       LLVMValueRef *texel)
 {
@@ -402,7 +402,7 @@ apply_sampler_swizzle(struct lp_build_sample_context *bld,
  * not really dimension as such, this indicates the amount of
  * "normal" texture coords subject to minification, wrapping etc.
  */
-static INLINE unsigned
+static inline unsigned
 texture_dims(enum pipe_texture_target tex)
 {
    switch (tex) {
@@ -424,7 +424,7 @@ texture_dims(enum pipe_texture_target tex)
    }
 }
 
-static INLINE boolean
+static inline boolean
 has_layer_coord(enum pipe_texture_target tex)
 {
    switch (tex) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index e391d8a4301..c4ae30461cb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -104,7 +104,7 @@ lp_build_tgsi_intrinsic(
    struct lp_build_context * base = &bld_base->base;
    emit_data->output[emit_data->chan] = lp_build_intrinsic(
                base->gallivm->builder, action->intr_name,
-               emit_data->dst_type, emit_data->args, emit_data->arg_count);
+               emit_data->dst_type, emit_data->args, emit_data->arg_count, 0);
 }
 
 LLVMValueRef
@@ -175,13 +175,52 @@ void lp_build_fetch_args(
    unsigned src;
    for (src = 0; src < emit_data->info->num_src; src++) {
       emit_data->args[src] = lp_build_emit_fetch(bld_base, emit_data->inst, src,
-                                               emit_data->chan);
+                                                 emit_data->src_chan);
    }
    emit_data->arg_count = emit_data->info->num_src;
    lp_build_action_set_dst_type(emit_data, bld_base,
 		emit_data->inst->Instruction.Opcode);
 }
 
+/**
+ * with doubles src and dst channels aren't 1:1.
+ * check the src/dst types for the opcode,
+ * 1. if neither is double then src == dst;
+ * 2. if dest is double
+ *     - don't store to y or w
+ *     - if src is double then src == dst.
+ *     - else for f2d, d.xy = s.x
+ *     - else for f2d, d.zw = s.y
+ * 3. if dst is single, src is double
+ *    - map dst x,z to src xy;
+ *    - map dst y,w to src zw;
+ */
+static int get_src_chan_idx(unsigned opcode,
+                            int dst_chan_index)
+{
+   enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(opcode);
+   enum tgsi_opcode_type stype = tgsi_opcode_infer_src_type(opcode);
+
+   if (dtype != TGSI_TYPE_DOUBLE && stype != TGSI_TYPE_DOUBLE)
+      return dst_chan_index;
+   if (dtype == TGSI_TYPE_DOUBLE) {
+      if (dst_chan_index == 1 || dst_chan_index == 3)
+         return -1;
+      if (stype == TGSI_TYPE_DOUBLE)
+         return dst_chan_index;
+      if (dst_chan_index == 0)
+         return 0;
+      if (dst_chan_index == 2)
+         return 1;
+   } else {
+      if (dst_chan_index == 0 || dst_chan_index == 2)
+         return 0;
+      if (dst_chan_index == 1 || dst_chan_index == 3)
+         return 2;
+   }
+   return -1;
+}
+
 /* XXX: COMMENT
  * It should be assumed that this function ignores writemasks
  */
@@ -197,7 +236,6 @@ lp_build_tgsi_inst_llvm(
    struct lp_build_emit_data emit_data;
    unsigned chan_index;
    LLVMValueRef val;
-
    bld_base->pc++;
 
    if (bld_base->emit_debug) {
@@ -240,7 +278,12 @@ lp_build_tgsi_inst_llvm(
    /* Emit the instructions */
    if (info->output_mode == TGSI_OUTPUT_COMPONENTWISE && bld_base->soa) {
       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan_index) {
+         int src_index = get_src_chan_idx(inst->Instruction.Opcode, chan_index);
+         /* ignore channels 1/3 in double dst */
+         if (src_index == -1)
+            continue;
          emit_data.chan = chan_index;
+         emit_data.src_chan = src_index;
          if (!action->fetch_args) {
             lp_build_fetch_args(bld_base, &emit_data);
          } else {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 967373ccdae..2ca9c6194b3 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -338,6 +338,7 @@ struct lp_build_tgsi_context
    struct lp_build_context uint_bld;
    struct lp_build_context int_bld;
 
+   struct lp_build_context dbl_bld;
    /** This array stores functions that are used to transform TGSI opcodes to
      * LLVM instructions.
      */
@@ -349,6 +350,9 @@ struct lp_build_tgsi_context
 
    struct lp_build_tgsi_action sqrt_action;
 
+   struct lp_build_tgsi_action drsq_action;
+
+   struct lp_build_tgsi_action dsqrt_action;
    const struct tgsi_shader_info *info;
 
    lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT];
@@ -558,13 +562,13 @@ struct lp_build_tgsi_aos_context
 
 };
 
-static INLINE struct lp_build_tgsi_soa_context *
+static inline struct lp_build_tgsi_soa_context *
 lp_soa_context(struct lp_build_tgsi_context *bld_base)
 {
    return (struct lp_build_tgsi_soa_context *)bld_base;
 }
 
-static INLINE struct lp_build_tgsi_aos_context *
+static inline struct lp_build_tgsi_aos_context *
 lp_aos_context(struct lp_build_tgsi_context *bld_base)
 {
    return (struct lp_build_tgsi_aos_context *)bld_base;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 9cb42b237b7..0ad78b0ace2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -894,6 +894,125 @@ const struct lp_build_tgsi_action xpd_action = {
    xpd_emit	 /* emit */
 };
 
+/* TGSI_OPCODE_D2F */
+static void
+d2f_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildFPTrunc(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                       bld_base->base.vec_type, "");
+}
+
+/* TGSI_OPCODE_D2I */
+static void
+d2i_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildFPToSI(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->base.int_vec_type, "");
+}
+
+/* TGSI_OPCODE_D2U */
+static void
+d2u_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildFPToUI(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->base.int_vec_type, "");
+}
+
+/* TGSI_OPCODE_F2D */
+static void
+f2d_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildFPExt(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->dbl_bld.vec_type, "");
+}
+
+/* TGSI_OPCODE_U2D */
+static void
+u2d_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildUIToFP(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->dbl_bld.vec_type, "");
+}
+
+/* TGSI_OPCODE_I2D */
+static void
+i2d_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] =
+      LLVMBuildSIToFP(bld_base->base.gallivm->builder,
+                      emit_data->args[0],
+                      bld_base->dbl_bld.vec_type, "");
+}
+
+/* TGSI_OPCODE_DMAD */
+static void
+dmad_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp;
+   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DMUL,
+                                   emit_data->args[0],
+                                   emit_data->args[1]);
+   emit_data->output[emit_data->chan] = lp_build_emit_llvm_binary(bld_base,
+                                       TGSI_OPCODE_DADD, tmp, emit_data->args[2]);
+}
+
+/*.TGSI_OPCODE_DRCP.*/
+static void drcp_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef one;
+   one = lp_build_const_vec(bld_base->dbl_bld.gallivm, bld_base->dbl_bld.type, 1.0f);
+   emit_data->output[emit_data->chan] = LLVMBuildFDiv(
+      bld_base->base.gallivm->builder,
+      one, emit_data->args[0], "");
+}
+
+/* TGSI_OPCODE_DFRAC */
+static void dfrac_emit(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   LLVMValueRef tmp;
+   tmp = lp_build_floor(&bld_base->dbl_bld,
+			emit_data->args[0]);
+   emit_data->output[emit_data->chan] =  LLVMBuildFSub(bld_base->base.gallivm->builder,
+                                                       emit_data->args[0], tmp, "");
+}
+
 void
 lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
 {
@@ -948,6 +1067,25 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base)
 
    bld_base->op_actions[TGSI_OPCODE_MAX].emit = fmax_emit;
    bld_base->op_actions[TGSI_OPCODE_MIN].emit = fmin_emit;
+
+   bld_base->op_actions[TGSI_OPCODE_DADD].emit = add_emit;
+   bld_base->op_actions[TGSI_OPCODE_DMAX].emit = fmax_emit;
+   bld_base->op_actions[TGSI_OPCODE_DMIN].emit = fmin_emit;
+   bld_base->op_actions[TGSI_OPCODE_DMUL].emit = mul_emit;
+
+   bld_base->op_actions[TGSI_OPCODE_D2F].emit = d2f_emit;
+   bld_base->op_actions[TGSI_OPCODE_D2I].emit = d2i_emit;
+   bld_base->op_actions[TGSI_OPCODE_D2U].emit = d2u_emit;
+
+   bld_base->op_actions[TGSI_OPCODE_F2D].emit = f2d_emit;
+   bld_base->op_actions[TGSI_OPCODE_I2D].emit = i2d_emit;
+   bld_base->op_actions[TGSI_OPCODE_U2D].emit = u2d_emit;
+
+   bld_base->op_actions[TGSI_OPCODE_DMAD].emit = dmad_emit;
+
+   bld_base->op_actions[TGSI_OPCODE_DRCP].emit = drcp_emit;
+   bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = dfrac_emit;
+
 }
 
 /* CPU Only default actions */
@@ -1792,6 +1930,107 @@ xor_emit_cpu(
                                                      emit_data->args[1]);
 }
 
+/* TGSI_OPCODE_DABS (CPU Only) */
+static void
+dabs_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_abs(&bld_base->dbl_bld,
+                                                       emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_DNEG (CPU Only) */
+static void
+dneg_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_sub(&bld_base->dbl_bld,
+                                                     bld_base->dbl_bld.zero,
+                                                     emit_data->args[0]);
+}
+
+/* TGSI_OPCODE_DSET Helper (CPU Only) */
+static void
+dset_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data,
+   unsigned pipe_func)
+{
+   LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+   LLVMValueRef cond = lp_build_cmp(&bld_base->dbl_bld, pipe_func,
+                                    emit_data->args[0], emit_data->args[1]);
+   /* arguments were 64 bit but store as 32 bit */
+   cond = LLVMBuildTrunc(builder, cond, bld_base->int_bld.int_vec_type, "");
+   emit_data->output[emit_data->chan] = cond;
+}
+
+/* TGSI_OPCODE_DSEQ (CPU Only) */
+static void
+dseq_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_EQUAL);
+}
+
+/* TGSI_OPCODE_DSGE (CPU Only) */
+static void
+dsge_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_GEQUAL);
+}
+
+/* TGSI_OPCODE_DSLT (CPU Only) */
+static void
+dslt_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_LESS);
+}
+
+/* TGSI_OPCODE_DSNE (CPU Only) */
+static void
+dsne_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   dset_emit_cpu(action, bld_base, emit_data, PIPE_FUNC_NOTEQUAL);
+}
+
+/* Double Reciprocal squareroot (CPU Only) */
+static void
+drecip_sqrt_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_rsqrt(&bld_base->dbl_bld,
+                                                         emit_data->args[0]);
+}
+
+/* Double Squareroot (CPU Only) */
+static void
+dsqrt_emit_cpu(
+   const struct lp_build_tgsi_action * action,
+   struct lp_build_tgsi_context * bld_base,
+   struct lp_build_emit_data * emit_data)
+{
+   emit_data->output[emit_data->chan] = lp_build_sqrt(&bld_base->dbl_bld,
+                                                      emit_data->args[0]);
+}
+
 void
 lp_set_default_actions_cpu(
    struct lp_build_tgsi_context * bld_base)
@@ -1864,4 +2103,14 @@ lp_set_default_actions_cpu(
 
    bld_base->op_actions[TGSI_OPCODE_XOR].emit = xor_emit_cpu;
 
+   bld_base->op_actions[TGSI_OPCODE_DABS].emit = dabs_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DNEG].emit = dneg_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = dseq_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DSGE].emit = dsge_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DSLT].emit = dslt_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DSNE].emit = dsne_emit_cpu;
+
+   bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu;
+   bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu;
+
 }
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
index fc7fdbdd231..463d44eb450 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
@@ -71,6 +71,11 @@ struct lp_build_emit_data {
     */
    unsigned chan;
 
+   /**
+    * This is used to specify the src channel to read from for doubles.
+    */
+   unsigned src_chan;
+
    /** The lp_build_tgsi_action::emit 'executes' the opcode and writes the
     * results to this array.
     */
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
index 55acea83799..906a1745551 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c
@@ -462,7 +462,7 @@ analyse_instruction(struct analysis_context *ctx,
 }
 
 
-static INLINE void
+static inline void
 dump_info(const struct tgsi_token *tokens,
           struct lp_tgsi_info *info)
 {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 268379e7d13..fae604e2f9c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -106,7 +106,7 @@ emit_dump_reg(struct gallivm_state *gallivm,
  * Return the context for the current function.
  * (always 'main', if shader doesn't do any function calls)
  */
-static INLINE struct function_ctx *
+static inline struct function_ctx *
 func_ctx(struct lp_exec_mask *mask)
 {
    assert(mask->function_stack_size > 0);
@@ -120,7 +120,7 @@ func_ctx(struct lp_exec_mask *mask)
  * no loop inside the current function, but we were inside
  * a loop inside another function, from which this one was called.
  */
-static INLINE boolean
+static inline boolean
 mask_has_loop(struct lp_exec_mask *mask)
 {
    int i;
@@ -138,7 +138,7 @@ mask_has_loop(struct lp_exec_mask *mask)
  * no switch in the current function, but we were inside
  * a switch inside another function, from which this one was called.
  */
-static INLINE boolean
+static inline boolean
 mask_has_switch(struct lp_exec_mask *mask)
 {
    int i;
@@ -156,7 +156,7 @@ mask_has_switch(struct lp_exec_mask *mask)
  * no conditional in the current function, but we were inside
  * a conditional inside another function, from which this one was called.
  */
-static INLINE boolean
+static inline boolean
 mask_has_cond(struct lp_exec_mask *mask)
 {
    int i;
@@ -947,15 +947,20 @@ static LLVMValueRef
 build_gather(struct lp_build_tgsi_context *bld_base,
              LLVMValueRef base_ptr,
              LLVMValueRef indexes,
-             LLVMValueRef overflow_mask)
+             LLVMValueRef overflow_mask,
+             LLVMValueRef indexes2)
 {
    struct gallivm_state *gallivm = bld_base->base.gallivm;
    LLVMBuilderRef builder = gallivm->builder;
    struct lp_build_context *uint_bld = &bld_base->uint_bld;
    struct lp_build_context *bld = &bld_base->base;
-   LLVMValueRef res = bld->undef;
+   LLVMValueRef res;
    unsigned i;
 
+   if (indexes2)
+      res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
+   else
+      res = bld->undef;
    /*
     * overflow_mask is a vector telling us which channels
     * in the vector overflowed. We use the overflow behavior for
@@ -976,26 +981,47 @@ build_gather(struct lp_build_tgsi_context *bld_base,
        * control flow.
        */
       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
+      if (indexes2)
+         indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
    }
 
    /*
     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
     */
-   for (i = 0; i < bld->type.length; i++) {
-      LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
-      LLVMValueRef index = LLVMBuildExtractElement(builder,
-                                                   indexes, ii, "");
+   for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
+      LLVMValueRef si, di;
+      LLVMValueRef index;
       LLVMValueRef scalar_ptr, scalar;
 
+      di = lp_build_const_int32(bld->gallivm, i);
+      if (indexes2)
+         si = lp_build_const_int32(bld->gallivm, i >> 1);
+      else
+         si = di;
+
+      if (indexes2 && (i & 1)) {
+         index = LLVMBuildExtractElement(builder,
+                                         indexes2, si, "");
+      } else {
+         index = LLVMBuildExtractElement(builder,
+                                         indexes, si, "");
+      }
       scalar_ptr = LLVMBuildGEP(builder, base_ptr,
                                 &index, 1, "gather_ptr");
       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
 
-      res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
+      res = LLVMBuildInsertElement(builder, res, scalar, di, "");
    }
 
    if (overflow_mask) {
-      res = lp_build_select(bld, overflow_mask, bld->zero, res);
+      if (indexes2) {
+         res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
+         overflow_mask = LLVMBuildSExt(builder, overflow_mask,
+                                       bld_base->dbl_bld.int_vec_type, "");
+         res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
+                               bld_base->dbl_bld.zero, res);
+      } else
+         res = lp_build_select(bld, overflow_mask, bld->zero, res);
    }
 
    return res;
@@ -1139,8 +1165,10 @@ stype_to_fetch(struct lp_build_tgsi_context * bld_base,
    case TGSI_TYPE_SIGNED:
       bld_fetch = &bld_base->int_bld;
       break;
-   case TGSI_TYPE_VOID:
    case TGSI_TYPE_DOUBLE:
+      bld_fetch = &bld_base->dbl_bld;
+      break;
+   case TGSI_TYPE_VOID:
    default:
       assert(0);
       bld_fetch = NULL;
@@ -1216,6 +1244,7 @@ emit_fetch_constant(
          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
       LLVMValueRef index_vec;  /* index into the const buffer */
       LLVMValueRef overflow_mask;
+      LLVMValueRef index_vec2 = NULL;
 
       indirect_index = get_indirect_index(bld,
                                           reg->Register.File,
@@ -1235,22 +1264,33 @@ emit_fetch_constant(
       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
 
+      if (stype == TGSI_TYPE_DOUBLE) {
+         LLVMValueRef swizzle_vec2;
+         swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1);
+         index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
+         index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
+      }
       /* Gather values from the constant buffer */
-      res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask);
+      res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
    }
    else {
       LLVMValueRef index;  /* index into the const buffer */
       LLVMValueRef scalar, scalar_ptr;
-
+      struct lp_build_context *bld_broad = &bld_base->base;
       index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
 
       scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
                                 &index, 1, "");
+      if (stype == TGSI_TYPE_DOUBLE) {
+         LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
+         scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
+         bld_broad = &bld_base->dbl_bld;
+      }
       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
-      res = lp_build_broadcast_scalar(&bld_base->base, scalar);
+      res = lp_build_broadcast_scalar(bld_broad, scalar);
    }
 
-   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
+   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
    }
@@ -1258,6 +1298,39 @@ emit_fetch_constant(
    return res;
 }
 
+/**
+ * Fetch double values from two separate channels.
+ * Doubles are stored split across two channels, like xy and zw.
+ * This function creates a set of 16 floats,
+ * extracts the values from the two channels,
+ * puts them in the correct place, then casts to 8 doubles.
+ */
+static LLVMValueRef
+emit_fetch_double(
+   struct lp_build_tgsi_context * bld_base,
+   enum tgsi_opcode_type stype,
+   LLVMValueRef input,
+   LLVMValueRef input2)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef res;
+   struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
+   int i;
+   LLVMValueRef shuffles[16];
+   int len = bld_base->base.type.length * 2;
+   assert(len <= 16);
+
+   for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
+      shuffles[i] = lp_build_const_int32(gallivm, i / 2);
+      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+   }
+   res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
+
+   return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
+}
+
 static LLVMValueRef
 emit_fetch_immediate(
    struct lp_build_tgsi_context * bld_base,
@@ -1281,7 +1354,7 @@ emit_fetch_immediate(
       if (reg->Register.Indirect) {
          LLVMValueRef indirect_index;
          LLVMValueRef index_vec;  /* index into the immediate register array */
-
+         LLVMValueRef index_vec2 = NULL;
          indirect_index = get_indirect_index(bld,
                                              reg->Register.File,
                                              reg->Register.Index,
@@ -1296,25 +1369,46 @@ emit_fetch_immediate(
                                            indirect_index,
                                            swizzle,
                                            FALSE);
-
+         if (stype == TGSI_TYPE_DOUBLE)
+            index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
+                                              indirect_index,
+                                              swizzle + 1,
+                                              FALSE);
          /* Gather values from the immediate register array */
-         res = build_gather(bld_base, imms_array, index_vec, NULL);
+         res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
       } else {
          LLVMValueRef lindex = lp_build_const_int32(gallivm,
                                         reg->Register.Index * 4 + swizzle);
          LLVMValueRef imms_ptr =  LLVMBuildGEP(builder,
                                                 bld->imms_array, &lindex, 1, "");
          res = LLVMBuildLoad(builder, imms_ptr, "");
+
+         if (stype == TGSI_TYPE_DOUBLE) {
+            LLVMValueRef lindex1;
+            LLVMValueRef imms_ptr2;
+            LLVMValueRef res2;
+
+            lindex1 = lp_build_const_int32(gallivm,
+                                           reg->Register.Index * 4 + swizzle + 1);
+            imms_ptr2 = LLVMBuildGEP(builder,
+                                      bld->imms_array, &lindex1, 1, "");
+            res2 = LLVMBuildLoad(builder, imms_ptr2, "");
+            res = emit_fetch_double(bld_base, stype, res, res2);
+         }
       }
    }
    else {
       res = bld->immediates[reg->Register.Index][swizzle];
+      if (stype == TGSI_TYPE_DOUBLE)
+         res = emit_fetch_double(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]);
    }
 
    if (stype == TGSI_TYPE_UNSIGNED) {
       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
    } else if (stype == TGSI_TYPE_SIGNED) {
       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_DOUBLE) {
+      res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
    }
    return res;
 }
@@ -1334,6 +1428,7 @@ emit_fetch_input(
    if (reg->Register.Indirect) {
       LLVMValueRef indirect_index;
       LLVMValueRef index_vec;  /* index into the input reg array */
+      LLVMValueRef index_vec2 = NULL;
       LLVMValueRef inputs_array;
       LLVMTypeRef fptr_type;
 
@@ -1346,23 +1441,43 @@ emit_fetch_input(
                                         indirect_index,
                                         swizzle,
                                         TRUE);
-
+      if (stype == TGSI_TYPE_DOUBLE) {
+         index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
+                                           indirect_index,
+                                           swizzle + 1,
+                                           TRUE);
+      }
       /* cast inputs_array pointer to float* */
       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
 
       /* Gather values from the input register array */
-      res = build_gather(bld_base, inputs_array, index_vec, NULL);
+      res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
    } else {
       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
          LLVMValueRef lindex = lp_build_const_int32(gallivm,
                                         reg->Register.Index * 4 + swizzle);
-         LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
-                                                bld->inputs_array, &lindex, 1, "");
+         LLVMValueRef input_ptr = LLVMBuildGEP(builder,
+                                               bld->inputs_array, &lindex, 1, "");
+
          res = LLVMBuildLoad(builder, input_ptr, "");
+         if (stype == TGSI_TYPE_DOUBLE) {
+            LLVMValueRef lindex1;
+            LLVMValueRef input_ptr2;
+            LLVMValueRef res2;
+
+            lindex1 = lp_build_const_int32(gallivm,
+                                           reg->Register.Index * 4 + swizzle + 1);
+            input_ptr2 = LLVMBuildGEP(builder,
+                                      bld->inputs_array, &lindex1, 1, "");
+            res2 = LLVMBuildLoad(builder, input_ptr2, "");
+            res = emit_fetch_double(bld_base, stype, res, res2);
+         }
       }
       else {
          res = bld->inputs[reg->Register.Index][swizzle];
+         if (stype == TGSI_TYPE_DOUBLE)
+            res = emit_fetch_double(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]);
       }
    }
 
@@ -1372,6 +1487,8 @@ emit_fetch_input(
       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
    } else if (stype == TGSI_TYPE_SIGNED) {
       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_DOUBLE) {
+      res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
    }
 
    return res;
@@ -1413,7 +1530,7 @@ emit_fetch_gs_input(
    } else {
       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
    }
-   
+
    if (reg->Dimension.Indirect) {
       vertex_index = get_indirect_index(bld,
                                         reg->Register.File,
@@ -1436,6 +1553,8 @@ emit_fetch_gs_input(
       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
    } else if (stype == TGSI_TYPE_SIGNED) {
       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+   } else if (stype == TGSI_TYPE_DOUBLE) {
+      res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
    }
 
    return res;
@@ -1455,7 +1574,7 @@ emit_fetch_temporary(
 
    if (reg->Register.Indirect) {
       LLVMValueRef indirect_index;
-      LLVMValueRef index_vec;  /* index into the temp reg array */
+      LLVMValueRef index_vec, index_vec2 = NULL;  /* index into the temp reg array */
       LLVMValueRef temps_array;
       LLVMTypeRef fptr_type;
 
@@ -1468,21 +1587,35 @@ emit_fetch_temporary(
                                         indirect_index,
                                         swizzle,
                                         TRUE);
+      if (stype == TGSI_TYPE_DOUBLE) {
+               index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
+                                                  indirect_index,
+                                                  swizzle + 1,
+                                                  TRUE);
+      }
 
       /* cast temps_array pointer to float* */
       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
 
       /* Gather values from the temporary register array */
-      res = build_gather(bld_base, temps_array, index_vec, NULL);
+      res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
    }
    else {
       LLVMValueRef temp_ptr;
       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
       res = LLVMBuildLoad(builder, temp_ptr, "");
+
+      if (stype == TGSI_TYPE_DOUBLE) {
+         LLVMValueRef temp_ptr2, res2;
+
+         temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1);
+         res2 = LLVMBuildLoad(builder, temp_ptr2, "");
+         res = emit_fetch_double(bld_base, stype, res, res2);
+      }
    }
 
-   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
+   if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE) {
       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
    }
@@ -1648,6 +1781,50 @@ emit_fetch_predicate(
    }
 }
 
+/**
+ * store an array of 8 doubles into two arrays of 8 floats
+ * i.e.
+ * value is d0, d1, d2, d3 etc.
+ * each double has high and low pieces x, y
+ * so gets stored into the separate channels as:
+ * chan_ptr = d0.x, d1.x, d2.x, d3.x
+ * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
+ */
+static void
+emit_store_double_chan(struct lp_build_tgsi_context *bld_base,
+                       int dtype,
+                       LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
+                       LLVMValueRef pred,
+                       LLVMValueRef value)
+{
+   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context *float_bld = &bld_base->base;
+   int i;
+   LLVMValueRef temp, temp2;
+   LLVMValueRef shuffles[8];
+   LLVMValueRef shuffles2[8];
+
+   for (i = 0; i < bld_base->base.type.length; i++) {
+      shuffles[i] = lp_build_const_int32(gallivm, i * 2);
+      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
+   }
+
+   temp = LLVMBuildShuffleVector(builder, value,
+                                 LLVMGetUndef(LLVMTypeOf(value)),
+                                 LLVMConstVector(shuffles,
+                                                 bld_base->base.type.length),
+                                 "");
+   temp2 = LLVMBuildShuffleVector(builder, value,
+                                  LLVMGetUndef(LLVMTypeOf(value)),
+                                  LLVMConstVector(shuffles2,
+                                                  bld_base->base.type.length),
+                                  "");
+
+   lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp, chan_ptr);
+   lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp2, chan_ptr2);
+}
 
 /**
  * Register store.
@@ -1683,6 +1860,11 @@ emit_store_chan(
    }
 
    if (reg->Register.Indirect) {
+      /*
+       * Currently the mesa/st doesn't generate indirect stores
+       * to doubles, it normally uses MOV to do indirect stores.
+       */
+      assert(dtype != TGSI_TYPE_DOUBLE);
       indirect_index = get_indirect_index(bld,
                                           reg->Register.File,
                                           reg->Register.Index,
@@ -1721,13 +1903,23 @@ emit_store_chan(
       else {
          LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
                                                   chan_index);
-         lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
+
+         if (dtype == TGSI_TYPE_DOUBLE) {
+            LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
+                                                      chan_index + 1);
+            emit_store_double_chan(bld_base, dtype, out_ptr, out_ptr2,
+                                   pred, value);
+         } else
+            lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
       }
       break;
 
    case TGSI_FILE_TEMPORARY:
       /* Temporaries are always stored as floats */
-      value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+      if (dtype != TGSI_TYPE_DOUBLE)
+         value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
+      else
+         value = LLVMBuildBitCast(builder, value,  LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
 
       if (reg->Register.Indirect) {
          LLVMValueRef index_vec;  /* indexes into the temp registers */
@@ -1749,7 +1941,16 @@ emit_store_chan(
       else {
          LLVMValueRef temp_ptr;
          temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
-         lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
+
+         if (dtype == TGSI_TYPE_DOUBLE) {
+            LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
+                                                         reg->Register.Index,
+                                                         chan_index + 1);
+            emit_store_double_chan(bld_base, dtype, temp_ptr, temp_ptr2,
+                                   pred, value);
+         }
+         else
+            lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
       }
       break;
 
@@ -1818,13 +2019,16 @@ emit_store(
 {
    unsigned chan_index;
    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
-
+   enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
    if(info->num_dst) {
       LLVMValueRef pred[TGSI_NUM_CHANNELS];
 
       emit_fetch_predicate( bld, inst, pred );
 
       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
+
+         if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || chan_index == 3))
+             continue;
          emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
       }
    }
@@ -2823,6 +3027,7 @@ void lp_emit_immediate_soa(
                lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
 
       break;
+   case TGSI_IMM_FLOAT64:
    case TGSI_IMM_UINT32:
       for( i = 0; i < size; ++i ) {
          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
@@ -2857,8 +3062,7 @@ void lp_emit_immediate_soa(
    } else {
       /* simply copy the immediate values into the next immediates[] slot */
       unsigned i;
-      const uint size = imm->Immediate.NrTokens - 1;
-      assert(size <= 4);
+      assert(imm->Immediate.NrTokens - 1 <= 4);
       assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
 
       for(i = 0; i < 4; ++i )
@@ -3674,6 +3878,12 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
+   {
+      struct lp_type dbl_type;
+      dbl_type = type;
+      dbl_type.width *= 2;
+      lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
+   }
    bld.mask = mask;
    bld.inputs = inputs;
    bld.outputs = outputs;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_type.h b/src/gallium/auxiliary/gallivm/lp_bld_type.h
index 191cf92d2d1..7fb449fd03f 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_type.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_type.h
@@ -173,7 +173,7 @@ struct lp_build_context
  *
  * e.g. With PIPE_FORMAT_R32G32B32A32_FLOAT returns an lp_type with float[4]
  */
-static INLINE void
+static inline void
 lp_type_from_format_desc(struct lp_type* type, const struct util_format_description *format_desc)
 {
    assert(format_desc->is_array);
@@ -189,14 +189,14 @@ lp_type_from_format_desc(struct lp_type* type, const struct util_format_descript
 }
 
 
-static INLINE void
+static inline void
 lp_type_from_format(struct lp_type* type, enum pipe_format format)
 {
    lp_type_from_format_desc(type, util_format_description(format));
 }
 
 
-static INLINE unsigned
+static inline unsigned
 lp_type_width(struct lp_type type)
 {
    return type.width * type.length;
@@ -204,7 +204,7 @@ lp_type_width(struct lp_type type)
 
 
 /** Create scalar float type */
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_float(unsigned width)
 {
    struct lp_type res_type;
@@ -220,7 +220,7 @@ lp_type_float(unsigned width)
 
 
 /** Create vector of float type */
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_float_vec(unsigned width, unsigned total_width)
 {
    struct lp_type res_type;
@@ -236,7 +236,7 @@ lp_type_float_vec(unsigned width, unsigned total_width)
 
 
 /** Create scalar int type */
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_int(unsigned width)
 {
    struct lp_type res_type;
@@ -251,7 +251,7 @@ lp_type_int(unsigned width)
 
 
 /** Create vector int type */
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_int_vec(unsigned width, unsigned total_width)
 {
    struct lp_type res_type;
@@ -266,7 +266,7 @@ lp_type_int_vec(unsigned width, unsigned total_width)
 
 
 /** Create scalar uint type */
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_uint(unsigned width)
 {
    struct lp_type res_type;
@@ -280,7 +280,7 @@ lp_type_uint(unsigned width)
 
 
 /** Create vector uint type */
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_uint_vec(unsigned width, unsigned total_width)
 {
    struct lp_type res_type;
@@ -293,7 +293,7 @@ lp_type_uint_vec(unsigned width, unsigned total_width)
 }
 
 
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_unorm(unsigned width, unsigned total_width)
 {
    struct lp_type res_type;
@@ -307,7 +307,7 @@ lp_type_unorm(unsigned width, unsigned total_width)
 }
 
 
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_fixed(unsigned width, unsigned total_width)
 {
    struct lp_type res_type;
@@ -322,7 +322,7 @@ lp_type_fixed(unsigned width, unsigned total_width)
 }
 
 
-static INLINE struct lp_type
+static inline struct lp_type
 lp_type_ufixed(unsigned width, unsigned total_width)
 {
    struct lp_type res_type;
@@ -364,7 +364,7 @@ LLVMTypeRef
 lp_build_int_vec_type(struct gallivm_state *gallivm, struct lp_type type);
 
 
-static INLINE struct lp_type
+static inline struct lp_type
 lp_float32_vec4_type(void)
 {
    struct lp_type type;
@@ -380,7 +380,7 @@ lp_float32_vec4_type(void)
 }
 
 
-static INLINE struct lp_type
+static inline struct lp_type
 lp_int32_vec4_type(void)
 {
    struct lp_type type;
@@ -396,7 +396,7 @@ lp_int32_vec4_type(void)
 }
 
 
-static INLINE struct lp_type
+static inline struct lp_type
 lp_unorm8_vec4_type(void)
 {
    struct lp_type type;
diff --git a/src/gallium/auxiliary/hud/hud_context.c b/src/gallium/auxiliary/hud/hud_context.c
index 6a124f7d716..95eed2698bc 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -231,18 +231,53 @@ hud_draw_string(struct hud_context *hud, unsigned x, unsigned y,
 }
 
 static void
-number_to_human_readable(uint64_t num, boolean is_in_bytes, char *out)
+number_to_human_readable(uint64_t num, uint64_t max_value,
+                         enum pipe_driver_query_type type, char *out)
 {
    static const char *byte_units[] =
-      {"", " KB", " MB", " GB", " TB", " PB", " EB"};
+      {" B", " KB", " MB", " GB", " TB", " PB", " EB"};
    static const char *metric_units[] =
       {"", " k", " M", " G", " T", " P", " E"};
-   const char **units = is_in_bytes ? byte_units : metric_units;
-   double divisor = is_in_bytes ? 1024 : 1000;
-   int unit = 0;
+   static const char *time_units[] =
+      {" us", " ms", " s"};  /* based on microseconds */
+   static const char *hz_units[] =
+      {" Hz", " KHz", " MHz", " GHz"};
+   static const char *percent_units[] = {"%"};
+
+   const char **units;
+   unsigned max_unit;
+   double divisor = (type == PIPE_DRIVER_QUERY_TYPE_BYTES) ? 1024 : 1000;
+   unsigned unit = 0;
    double d = num;
 
-   while (d > divisor) {
+   switch (type) {
+   case PIPE_DRIVER_QUERY_TYPE_MICROSECONDS:
+      max_unit = ARRAY_SIZE(time_units)-1;
+      units = time_units;
+      break;
+   case PIPE_DRIVER_QUERY_TYPE_PERCENTAGE:
+      max_unit = ARRAY_SIZE(percent_units)-1;
+      units = percent_units;
+      break;
+   case PIPE_DRIVER_QUERY_TYPE_BYTES:
+      max_unit = ARRAY_SIZE(byte_units)-1;
+      units = byte_units;
+      break;
+   case PIPE_DRIVER_QUERY_TYPE_HZ:
+      max_unit = ARRAY_SIZE(hz_units)-1;
+      units = hz_units;
+      break;
+   default:
+      if (max_value == 100) {
+         max_unit = ARRAY_SIZE(percent_units)-1;
+         units = percent_units;
+      } else {
+         max_unit = ARRAY_SIZE(metric_units)-1;
+         units = metric_units;
+      }
+   }
+
+   while (d > divisor && unit < max_unit) {
       d /= divisor;
       unit++;
    }
@@ -300,9 +335,9 @@ hud_pane_accumulate_vertices(struct hud_context *hud,
       unsigned y = pane->inner_y1 + pane->inner_height * (5 - i) / 5 -
                    hud->font.glyph_height / 2;
 
-      number_to_human_readable(pane->max_value * i / 5,
-                               pane->uses_byte_units, str);
-      hud_draw_string(hud, x, y, str);
+      number_to_human_readable(pane->max_value * i / 5, pane->max_value,
+                               pane->type, str);
+      hud_draw_string(hud, x, y, "%s", str);
    }
 
    /* draw info below the pane */
@@ -311,8 +346,8 @@ hud_pane_accumulate_vertices(struct hud_context *hud,
       unsigned x = pane->x1 + 2;
       unsigned y = pane->y2 + 2 + i*hud->font.glyph_height;
 
-      number_to_human_readable(gr->current_value,
-                               pane->uses_byte_units, str);
+      number_to_human_readable(gr->current_value, pane->max_value,
+                               pane->type, str);
       hud_draw_string(hud, x, y, "  %s: %s", gr->name, str);
       i++;
    }
@@ -417,8 +452,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
    cso_save_blend(cso);
    cso_save_depth_stencil_alpha(cso);
    cso_save_fragment_shader(cso);
-   cso_save_sampler_views(cso, PIPE_SHADER_FRAGMENT);
-   cso_save_samplers(cso, PIPE_SHADER_FRAGMENT);
+   cso_save_fragment_sampler_views(cso);
+   cso_save_fragment_samplers(cso);
    cso_save_rasterizer(cso);
    cso_save_viewport(cso);
    cso_save_stream_outputs(cso);
@@ -547,8 +582,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
    cso_restore_blend(cso);
    cso_restore_depth_stencil_alpha(cso);
    cso_restore_fragment_shader(cso);
-   cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT);
-   cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT);
+   cso_restore_fragment_sampler_views(cso);
+   cso_restore_fragment_samplers(cso);
    cso_restore_rasterizer(cso);
    cso_restore_viewport(cso);
    cso_restore_stream_outputs(cso);
@@ -869,12 +904,16 @@ hud_parse_env_var(struct hud_context *hud, const char *env)
       else if (strcmp(name, "samples-passed") == 0 &&
                has_occlusion_query(hud->pipe->screen)) {
          hud_pipe_query_install(pane, hud->pipe, "samples-passed",
-                                PIPE_QUERY_OCCLUSION_COUNTER, 0, 0, FALSE);
+                                PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
+                                PIPE_DRIVER_QUERY_TYPE_UINT64,
+                                PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
       }
       else if (strcmp(name, "primitives-generated") == 0 &&
                has_streamout(hud->pipe->screen)) {
          hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
-                                PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0, FALSE);
+                                PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
+                                PIPE_DRIVER_QUERY_TYPE_UINT64,
+                                PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
       }
       else {
          boolean processed = FALSE;
@@ -901,7 +940,8 @@ hud_parse_env_var(struct hud_context *hud, const char *env)
             if (i < Elements(pipeline_statistics_names)) {
                hud_pipe_query_install(pane, hud->pipe, name,
                                       PIPE_QUERY_PIPELINE_STATISTICS, i,
-                                      0, FALSE);
+                                      0, PIPE_DRIVER_QUERY_TYPE_UINT64,
+                                      PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
                processed = TRUE;
             }
          }
diff --git a/src/gallium/auxiliary/hud/hud_driver_query.c b/src/gallium/auxiliary/hud/hud_driver_query.c
index 603aba7e8cd..f14305ea835 100644
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -43,6 +43,7 @@ struct query_info {
    struct pipe_context *pipe;
    unsigned query_type;
    unsigned result_index; /* unit depends on query_type */
+   enum pipe_driver_query_result_type result_type;
 
    /* Ring of queries. If a query is busy, we use another slot. */
    struct pipe_query *query[NUM_QUERIES];
@@ -62,7 +63,8 @@ query_new_value(struct hud_graph *gr)
    uint64_t now = os_time_get();
 
    if (info->last_time) {
-      pipe->end_query(pipe, info->query[info->head]);
+      if (info->query[info->head])
+         pipe->end_query(pipe, info->query[info->head]);
 
       /* read query results */
       while (1) {
@@ -70,7 +72,7 @@ query_new_value(struct hud_graph *gr)
          union pipe_query_result result;
          uint64_t *res64 = (uint64_t *)&result;
 
-         if (pipe->get_query_result(pipe, query, FALSE, &result)) {
+         if (query && pipe->get_query_result(pipe, query, FALSE, &result)) {
             info->results_cumulative += res64[info->result_index];
             info->num_results++;
 
@@ -88,7 +90,8 @@ query_new_value(struct hud_graph *gr)
                        "gallium_hud: all queries are busy after %i frames, "
                        "can't add another query\n",
                        NUM_QUERIES);
-               pipe->destroy_query(pipe, info->query[info->head]);
+               if (info->query[info->head])
+                  pipe->destroy_query(pipe, info->query[info->head]);
                info->query[info->head] =
                      pipe->create_query(pipe, info->query_type, 0);
             }
@@ -106,22 +109,33 @@ query_new_value(struct hud_graph *gr)
       }
 
       if (info->num_results && info->last_time + gr->pane->period <= now) {
-         /* compute the average value across all frames */
-         hud_graph_add_value(gr, info->results_cumulative / info->num_results);
+         uint64_t value;
+
+         switch (info->result_type) {
+         default:
+         case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE:
+            value = info->results_cumulative / info->num_results;
+            break;
+         case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE:
+            value = info->results_cumulative;
+            break;
+         }
+
+         hud_graph_add_value(gr, value);
 
          info->last_time = now;
          info->results_cumulative = 0;
          info->num_results = 0;
       }
-
-      pipe->begin_query(pipe, info->query[info->head]);
    }
    else {
       /* initialize */
       info->last_time = now;
       info->query[info->head] = pipe->create_query(pipe, info->query_type, 0);
-      pipe->begin_query(pipe, info->query[info->head]);
    }
+
+   if (info->query[info->head])
+      pipe->begin_query(pipe, info->query[info->head]);
 }
 
 static void
@@ -148,7 +162,8 @@ void
 hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
                        const char *name, unsigned query_type,
                        unsigned result_index,
-                       uint64_t max_value, boolean uses_byte_units)
+                       uint64_t max_value, enum pipe_driver_query_type type,
+                       enum pipe_driver_query_result_type result_type)
 {
    struct hud_graph *gr;
    struct query_info *info;
@@ -172,12 +187,12 @@ hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
    info->pipe = pipe;
    info->query_type = query_type;
    info->result_index = result_index;
+   info->result_type = result_type;
 
    hud_pane_add_graph(pane, gr);
    if (pane->max_value < max_value)
       hud_pane_set_max_value(pane, max_value);
-   if (uses_byte_units)
-      pane->uses_byte_units = TRUE;
+   pane->type = type;
 }
 
 boolean
@@ -187,7 +202,6 @@ hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe,
    struct pipe_screen *screen = pipe->screen;
    struct pipe_driver_query_info query;
    unsigned num_queries, i;
-   boolean uses_byte_units;
    boolean found = FALSE;
 
    if (!screen->get_driver_query_info)
@@ -206,9 +220,8 @@ hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe,
    if (!found)
       return FALSE;
 
-   uses_byte_units = query.type == PIPE_DRIVER_QUERY_TYPE_BYTES;
    hud_pipe_query_install(pane, pipe, query.name, query.query_type, 0,
-                          query.max_value.u64, uses_byte_units);
+                          query.max_value.u64, query.type, query.result_type);
 
    return TRUE;
 }
diff --git a/src/gallium/auxiliary/hud/hud_private.h b/src/gallium/auxiliary/hud/hud_private.h
index 632926b87f5..01caf7b8b2c 100644
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -66,7 +66,7 @@ struct hud_pane {
    uint64_t ceiling;
    unsigned dyn_ceil_last_ran;
    boolean dyn_ceiling;
-   boolean uses_byte_units;
+   enum pipe_driver_query_type type;
    uint64_t period; /* in microseconds */
 
    struct list_head graph_list;
@@ -89,7 +89,9 @@ void hud_cpu_graph_install(struct hud_pane *pane, unsigned cpu_index);
 void hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
                             const char *name, unsigned query_type,
                             unsigned result_index,
-                            uint64_t max_value, boolean uses_byte_units);
+                            uint64_t max_value,
+                            enum pipe_driver_query_type type,
+                            enum pipe_driver_query_result_type result_type);
 boolean hud_driver_query_install(struct hud_pane *pane,
                                  struct pipe_context *pipe, const char *name);
 
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 061f39ac6f3..93dfb803389 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -184,7 +184,8 @@ ttn_emit_declaration(struct ttn_compile *c)
          c->samp_types[decl->Range.First + i] = type;
       }
    } else {
-      nir_variable *var;
+      bool is_array = (array_size > 1);
+
       assert(file == TGSI_FILE_INPUT ||
              file == TGSI_FILE_OUTPUT ||
              file == TGSI_FILE_CONSTANT);
@@ -193,76 +194,99 @@ ttn_emit_declaration(struct ttn_compile *c)
       if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension)
          return;
 
-      var = rzalloc(b->shader, nir_variable);
-      var->data.driver_location = decl->Range.First;
+      if ((file == TGSI_FILE_INPUT) || (file == TGSI_FILE_OUTPUT)) {
+         is_array = (is_array && decl->Declaration.Array &&
+                     (decl->Array.ArrayID != 0));
+      }
 
-      var->type = glsl_vec4_type();
-      if (array_size > 1)
-         var->type = glsl_array_type(var->type, array_size);
+      for (i = 0; i < array_size; i++) {
+         unsigned idx = decl->Range.First + i;
+         nir_variable *var = rzalloc(b->shader, nir_variable);
 
-      switch (file) {
-      case TGSI_FILE_INPUT:
-         var->data.read_only = true;
-         var->data.mode = nir_var_shader_in;
-         var->name = ralloc_asprintf(var, "in_%d", decl->Range.First);
+         var->data.driver_location = idx;
 
-         /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
-          * instead, but nothing in NIR core is looking at the value
-          * currently, and this is less change to drivers.
-          */
-         var->data.location = decl->Semantic.Name;
-         var->data.index = decl->Semantic.Index;
+         var->type = glsl_vec4_type();
+         if (is_array)
+            var->type = glsl_array_type(var->type, array_size);
 
-         /* We definitely need to translate the interpolation field, because
-          * nir_print will decode it.
-          */
-         switch (decl->Interp.Interpolate) {
-         case TGSI_INTERPOLATE_CONSTANT:
-            var->data.interpolation = INTERP_QUALIFIER_FLAT;
-            break;
-         case TGSI_INTERPOLATE_LINEAR:
-            var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
-            break;
-         case TGSI_INTERPOLATE_PERSPECTIVE:
-            var->data.interpolation = INTERP_QUALIFIER_SMOOTH;
-            break;
-         }
+         switch (file) {
+         case TGSI_FILE_INPUT:
+            var->data.read_only = true;
+            var->data.mode = nir_var_shader_in;
+            var->name = ralloc_asprintf(var, "in_%d", idx);
 
-         exec_list_push_tail(&b->shader->inputs, &var->node);
-         break;
-      case TGSI_FILE_OUTPUT: {
-         /* Since we can't load from outputs in the IR, we make temporaries
-          * for the outputs and emit stores to the real outputs at the end of
-          * the shader.
-          */
-         nir_register *reg = nir_local_reg_create(b->impl);
-         reg->num_components = 4;
-         if (array_size > 1)
-            reg->num_array_elems = array_size;
-
-         var->data.mode = nir_var_shader_out;
-         var->name = ralloc_asprintf(var, "out_%d", decl->Range.First);
-
-         var->data.location = decl->Semantic.Name;
-         var->data.index = decl->Semantic.Index;
+            /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
+             * instead, but nothing in NIR core is looking at the value
+             * currently, and this is less change to drivers.
+             */
+            var->data.location = decl->Semantic.Name;
+            var->data.index = decl->Semantic.Index;
 
-         for (i = 0; i < array_size; i++) {
-            c->output_regs[decl->Range.First + i].offset = i;
-            c->output_regs[decl->Range.First + i].reg = reg;
+            /* We definitely need to translate the interpolation field, because
+             * nir_print will decode it.
+             */
+            switch (decl->Interp.Interpolate) {
+            case TGSI_INTERPOLATE_CONSTANT:
+               var->data.interpolation = INTERP_QUALIFIER_FLAT;
+               break;
+            case TGSI_INTERPOLATE_LINEAR:
+               var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
+               break;
+            case TGSI_INTERPOLATE_PERSPECTIVE:
+               var->data.interpolation = INTERP_QUALIFIER_SMOOTH;
+               break;
+            }
+
+            exec_list_push_tail(&b->shader->inputs, &var->node);
+            break;
+         case TGSI_FILE_OUTPUT: {
+            /* Since we can't load from outputs in the IR, we make temporaries
+             * for the outputs and emit stores to the real outputs at the end of
+             * the shader.
+             */
+            nir_register *reg = nir_local_reg_create(b->impl);
+            reg->num_components = 4;
+            if (is_array)
+               reg->num_array_elems = array_size;
+
+            var->data.mode = nir_var_shader_out;
+            var->name = ralloc_asprintf(var, "out_%d", idx);
+
+            var->data.location = decl->Semantic.Name;
+            if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+                decl->Semantic.Index == 0 &&
+                c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
+               var->data.index = -1;
+            else
+               var->data.index = decl->Semantic.Index;
+
+            if (is_array) {
+               unsigned j;
+               for (j = 0; j < array_size; j++) {
+                  c->output_regs[idx + j].offset = i + j;
+                  c->output_regs[idx + j].reg = reg;
+               }
+            } else {
+               c->output_regs[idx].offset = i;
+               c->output_regs[idx].reg = reg;
+            }
+
+            exec_list_push_tail(&b->shader->outputs, &var->node);
          }
+            break;
+         case TGSI_FILE_CONSTANT:
+            var->data.mode = nir_var_uniform;
+            var->name = ralloc_asprintf(var, "uniform_%d", idx);
 
-         exec_list_push_tail(&b->shader->outputs, &var->node);
-      }
-         break;
-      case TGSI_FILE_CONSTANT:
-         var->data.mode = nir_var_uniform;
-         var->name = ralloc_asprintf(var, "uniform_%d", decl->Range.First);
+            exec_list_push_tail(&b->shader->uniforms, &var->node);
+            break;
+         default:
+            unreachable("bad declaration file");
+            return;
+         }
 
-         exec_list_push_tail(&b->shader->uniforms, &var->node);
-         break;
-      default:
-         unreachable("bad declaration file");
-         return;
+         if (is_array)
+            break;
       }
 
    }
@@ -975,6 +999,9 @@ static void
 setup_texture_info(nir_tex_instr *instr, unsigned texture)
 {
    switch (texture) {
+   case TGSI_TEXTURE_BUFFER:
+      instr->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+      break;
    case TGSI_TEXTURE_1D:
       instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
       break;
@@ -1068,6 +1095,11 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
       op = nir_texop_txb;
       num_srcs = 2;
       break;
+   case TGSI_OPCODE_TXB2:
+      op = nir_texop_txb;
+      num_srcs = 2;
+      samp = 2;
+      break;
    case TGSI_OPCODE_TXL:
       op = nir_texop_txl;
       num_srcs = 2;
@@ -1078,7 +1110,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
       samp = 2;
       break;
    case TGSI_OPCODE_TXF:
-      op = nir_texop_txf;
+      if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA ||
+          tgsi_inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) {
+         op = nir_texop_txf_ms;
+      } else {
+         op = nir_texop_txf;
+      }
       num_srcs = 2;
       break;
    case TGSI_OPCODE_TXD:
@@ -1164,6 +1201,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
       src_number++;
    }
 
+   if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
+      instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
+      instr->src[src_number].src_type = nir_tex_src_bias;
+      src_number++;
+   }
+
    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
       instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
       instr->src[src_number].src_type = nir_tex_src_lod;
@@ -1178,7 +1221,10 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
 
    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
       instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
-      instr->src[src_number].src_type = nir_tex_src_lod;
+      if (op == nir_texop_txf_ms)
+         instr->src[src_number].src_type = nir_tex_src_ms_index;
+      else
+         instr->src[src_number].src_type = nir_tex_src_lod;
       src_number++;
    }
 
@@ -1472,7 +1518,7 @@ ttn_emit_instruction(struct ttn_compile *c)
       return;
 
    nir_ssa_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
-   for (i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++) {
+   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
       src[i] = ttn_get_src(c, &tgsi_inst->Src[i]);
    }
    nir_alu_dest dest = ttn_get_dest(c, tgsi_dst);
@@ -1708,9 +1754,11 @@ ttn_add_output_stores(struct ttn_compile *c)
       for (i = 0; i < array_len; i++) {
          nir_intrinsic_instr *store =
             nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+         unsigned loc = var->data.driver_location + i;
          store->num_components = 4;
-         store->const_index[0] = var->data.driver_location + i;
-         store->src[0].reg.reg = c->output_regs[var->data.driver_location].reg;
+         store->const_index[0] = loc;
+         store->src[0].reg.reg = c->output_regs[loc].reg;
+         store->src[0].reg.base_offset = c->output_regs[loc].offset;
          nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
       }
    }
diff --git a/src/gallium/auxiliary/os/os_memory_aligned.h b/src/gallium/auxiliary/os/os_memory_aligned.h
index bb15f24ade3..f7d0e3652ed 100644
--- a/src/gallium/auxiliary/os/os_memory_aligned.h
+++ b/src/gallium/auxiliary/os/os_memory_aligned.h
@@ -55,7 +55,7 @@ add_overflow_size_t(size_t a, size_t b, size_t *res)
 /**
  * Return memory on given byte alignment
  */
-static INLINE void *
+static inline void *
 os_malloc_aligned(size_t size, size_t alignment)
 {
    char *ptr, *buf;
@@ -87,7 +87,7 @@ os_malloc_aligned(size_t size, size_t alignment)
 /**
  * Free memory returned by align_malloc().
  */
-static INLINE void
+static inline void
 os_free_aligned(void *ptr)
 {
    if (ptr) {
diff --git a/src/gallium/auxiliary/os/os_memory_stdc.h b/src/gallium/auxiliary/os/os_memory_stdc.h
index 806e5363568..c9fde06d8ac 100644
--- a/src/gallium/auxiliary/os/os_memory_stdc.h
+++ b/src/gallium/auxiliary/os/os_memory_stdc.h
@@ -50,7 +50,7 @@
 
 #if defined(HAVE_POSIX_MEMALIGN)
 
-static INLINE void *
+static inline void *
 os_malloc_aligned(size_t size, size_t alignment)
 {
    void *ptr;
diff --git a/src/gallium/auxiliary/os/os_mman.h b/src/gallium/auxiliary/os/os_mman.h
index e892610bdbd..2ae0027c1c2 100644
--- a/src/gallium/auxiliary/os/os_mman.h
+++ b/src/gallium/auxiliary/os/os_mman.h
@@ -58,7 +58,7 @@ extern "C" {
 
 extern void *__mmap2(void *, size_t, int, int, int, size_t);
 
-static INLINE void *os_mmap(void *addr, size_t length, int prot, int flags,
+static inline void *os_mmap(void *addr, size_t length, int prot, int flags,
                             int fd, loff_t offset)
 {
    /* offset must be aligned to 4096 (not necessarily the page size) */
@@ -78,7 +78,7 @@ static INLINE void *os_mmap(void *addr, size_t length, int prot, int flags,
 #  define os_mmap(addr, length, prot, flags, fd, offset) \
              mmap(addr, length, prot, flags, fd, offset)
 
-static INLINE int os_munmap(void *addr, size_t length)
+static inline int os_munmap(void *addr, size_t length)
 {
    /* Copied from configure code generated by AC_SYS_LARGEFILE */
 #define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + \
diff --git a/src/gallium/auxiliary/os/os_thread.h b/src/gallium/auxiliary/os/os_thread.h
index e9da8954885..be8adcc6cf2 100644
--- a/src/gallium/auxiliary/os/os_thread.h
+++ b/src/gallium/auxiliary/os/os_thread.h
@@ -54,7 +54,7 @@ typedef thrd_t pipe_thread;
 #define PIPE_THREAD_ROUTINE( name, param ) \
    int name( void *param )
 
-static INLINE pipe_thread pipe_thread_create( PIPE_THREAD_ROUTINE((*routine), ), void *param )
+static inline pipe_thread pipe_thread_create( PIPE_THREAD_ROUTINE((*routine), ), void *param )
 {
    pipe_thread thread;
 #ifdef HAVE_PTHREAD
@@ -75,17 +75,17 @@ static INLINE pipe_thread pipe_thread_create( PIPE_THREAD_ROUTINE((*routine), ),
    return thread;
 }
 
-static INLINE int pipe_thread_wait( pipe_thread thread )
+static inline int pipe_thread_wait( pipe_thread thread )
 {
    return thrd_join( thread, NULL );
 }
 
-static INLINE int pipe_thread_destroy( pipe_thread thread )
+static inline int pipe_thread_destroy( pipe_thread thread )
 {
    return thrd_detach( thread );
 }
 
-static INLINE void pipe_thread_setname( const char *name )
+static inline void pipe_thread_setname( const char *name )
 {
 #if defined(HAVE_PTHREAD)
 #  if defined(__GNU_LIBRARY__) && defined(__GLIBC__) && defined(__GLIBC_MINOR__) && \
@@ -145,17 +145,17 @@ typedef cnd_t pipe_condvar;
 
 typedef pthread_barrier_t pipe_barrier;
 
-static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
+static inline void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
 {
    pthread_barrier_init(barrier, NULL, count);
 }
 
-static INLINE void pipe_barrier_destroy(pipe_barrier *barrier)
+static inline void pipe_barrier_destroy(pipe_barrier *barrier)
 {
    pthread_barrier_destroy(barrier);
 }
 
-static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
+static inline void pipe_barrier_wait(pipe_barrier *barrier)
 {
    pthread_barrier_wait(barrier);
 }
@@ -171,7 +171,7 @@ typedef struct {
    pipe_condvar condvar;
 } pipe_barrier;
 
-static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
+static inline void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
 {
    barrier->count = count;
    barrier->waiters = 0;
@@ -180,14 +180,14 @@ static INLINE void pipe_barrier_init(pipe_barrier *barrier, unsigned count)
    pipe_condvar_init(barrier->condvar);
 }
 
-static INLINE void pipe_barrier_destroy(pipe_barrier *barrier)
+static inline void pipe_barrier_destroy(pipe_barrier *barrier)
 {
    assert(barrier->waiters == 0);
    pipe_mutex_destroy(barrier->mutex);
    pipe_condvar_destroy(barrier->condvar);
 }
 
-static INLINE void pipe_barrier_wait(pipe_barrier *barrier)
+static inline void pipe_barrier_wait(pipe_barrier *barrier)
 {
    pipe_mutex_lock(barrier->mutex);
 
@@ -225,7 +225,7 @@ typedef struct
 } pipe_semaphore;
 
 
-static INLINE void
+static inline void
 pipe_semaphore_init(pipe_semaphore *sema, int init_val)
 {
    pipe_mutex_init(sema->mutex);
@@ -233,7 +233,7 @@ pipe_semaphore_init(pipe_semaphore *sema, int init_val)
    sema->counter = init_val;
 }
 
-static INLINE void
+static inline void
 pipe_semaphore_destroy(pipe_semaphore *sema)
 {
    pipe_mutex_destroy(sema->mutex);
@@ -241,7 +241,7 @@ pipe_semaphore_destroy(pipe_semaphore *sema)
 }
 
 /** Signal/increment semaphore counter */
-static INLINE void
+static inline void
 pipe_semaphore_signal(pipe_semaphore *sema)
 {
    pipe_mutex_lock(sema->mutex);
@@ -251,7 +251,7 @@ pipe_semaphore_signal(pipe_semaphore *sema)
 }
 
 /** Wait for semaphore counter to be greater than zero */
-static INLINE void
+static inline void
 pipe_semaphore_wait(pipe_semaphore *sema)
 {
    pipe_mutex_lock(sema->mutex);
@@ -277,7 +277,7 @@ typedef struct {
 #define PIPE_TSD_INIT_MAGIC 0xff8adc98
 
 
-static INLINE void
+static inline void
 pipe_tsd_init(pipe_tsd *tsd)
 {
    if (tss_create(&tsd->key, NULL/*free*/) != 0) {
@@ -286,7 +286,7 @@ pipe_tsd_init(pipe_tsd *tsd)
    tsd->initMagic = PIPE_TSD_INIT_MAGIC;
 }
 
-static INLINE void *
+static inline void *
 pipe_tsd_get(pipe_tsd *tsd)
 {
    if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) {
@@ -295,7 +295,7 @@ pipe_tsd_get(pipe_tsd *tsd)
    return tss_get(tsd->key);
 }
 
-static INLINE void
+static inline void
 pipe_tsd_set(pipe_tsd *tsd, void *value)
 {
    if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) {
diff --git a/src/gallium/auxiliary/os/os_time.c b/src/gallium/auxiliary/os/os_time.c
index f7e4ca49c7c..3d2e4167222 100644
--- a/src/gallium/auxiliary/os/os_time.c
+++ b/src/gallium/auxiliary/os/os_time.c
@@ -33,11 +33,13 @@
  */
 
 
-#include "pipe/p_config.h"
+#include "pipe/p_defines.h"
+#include "util/u_atomic.h"
 
 #if defined(PIPE_OS_UNIX)
 #  include <time.h> /* timeval */
 #  include <sys/time.h> /* timeval */
+#  include <sched.h> /* sched_yield */
 #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
 #  include <windows.h>
 #else
@@ -92,3 +94,78 @@ os_time_sleep(int64_t usecs)
 }
 
 #endif
+
+
+int64_t
+os_time_get_absolute_timeout(uint64_t timeout)
+{
+   int64_t time, abs_timeout;
+
+   /* Also check for the type upper bound. */
+   if (timeout == PIPE_TIMEOUT_INFINITE || timeout > INT64_MAX)
+      return PIPE_TIMEOUT_INFINITE;
+
+   time = os_time_get_nano();
+   abs_timeout = time + (int64_t)timeout;
+
+   /* Check for overflow. */
+   if (abs_timeout < time)
+      return PIPE_TIMEOUT_INFINITE;
+
+   return abs_timeout;
+}
+
+
+bool
+os_wait_until_zero(volatile int *var, uint64_t timeout)
+{
+   if (!p_atomic_read(var))
+      return true;
+
+   if (!timeout)
+      return false;
+
+   if (timeout == PIPE_TIMEOUT_INFINITE) {
+      while (p_atomic_read(var)) {
+#if defined(PIPE_OS_UNIX)
+         sched_yield();
+#endif
+      }
+      return true;
+   }
+   else {
+      int64_t start_time = os_time_get_nano();
+      int64_t end_time = start_time + timeout;
+
+      while (p_atomic_read(var)) {
+         if (os_time_timeout(start_time, end_time, os_time_get_nano()))
+            return false;
+
+#if defined(PIPE_OS_UNIX)
+         sched_yield();
+#endif
+      }
+      return true;
+   }
+}
+
+
+bool
+os_wait_until_zero_abs_timeout(volatile int *var, int64_t timeout)
+{
+   if (!p_atomic_read(var))
+      return true;
+
+   if (timeout == PIPE_TIMEOUT_INFINITE)
+      return os_wait_until_zero(var, PIPE_TIMEOUT_INFINITE);
+
+   while (p_atomic_read(var)) {
+      if (os_time_get_nano() >= timeout)
+         return false;
+
+#if defined(PIPE_OS_UNIX)
+      sched_yield();
+#endif
+   }
+   return true;
+}
diff --git a/src/gallium/auxiliary/os/os_time.h b/src/gallium/auxiliary/os/os_time.h
index 4fab03cc671..9312e028809 100644
--- a/src/gallium/auxiliary/os/os_time.h
+++ b/src/gallium/auxiliary/os/os_time.h
@@ -45,7 +45,7 @@
 #include "pipe/p_compiler.h"
 
 
-#ifdef	__cplusplus
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -60,9 +60,10 @@ os_time_get_nano(void);
 /*
  * Get the current time in microseconds from an unknown base.
  */
-static INLINE int64_t
-os_time_get(void) {
-    return os_time_get_nano() / 1000;
+static inline int64_t
+os_time_get(void)
+{
+   return os_time_get_nano() / 1000;
 }
 
 
@@ -82,19 +83,56 @@ os_time_sleep(int64_t usecs);
  *
  * Returns true if the current time has elapsed beyond the specified interval.
  */
-static INLINE boolean
+static inline boolean
 os_time_timeout(int64_t start,
                 int64_t end,
                 int64_t curr)
 {
-   if(start <= end)
+   if (start <= end)
       return !(start <= curr && curr < end);
    else
       return !((start <= curr) || (curr < end));
 }
 
 
-#ifdef	__cplusplus
+/**
+ * Convert a relative timeout in nanoseconds into an absolute timeout,
+ * in other words, it returns current time + timeout.
+ * os_time_get_nano() must be monotonic.
+ * PIPE_TIMEOUT_INFINITE is passed through unchanged. If the calculation
+ * overflows, PIPE_TIMEOUT_INFINITE is returned.
+ */
+int64_t
+os_time_get_absolute_timeout(uint64_t timeout);
+
+
+/**
+ * Wait until the variable at the given memory location is zero.
+ *
+ * \param var           variable
+ * \param timeout       timeout in ns, can be anything from 0 (no wait) to
+ *                      PIPE_TIME_INFINITE (wait forever)
+ * \return     true if the variable is zero
+ */
+bool
+os_wait_until_zero(volatile int *var, uint64_t timeout);
+
+
+/**
+ * Wait until the variable at the given memory location is zero.
+ * The timeout is the absolute time when the waiting should stop. If it is
+ * less than or equal to the current time, it only returns the status and
+ * doesn't wait. PIPE_TIME_INFINITE waits forever. This requires that
+ * os_time_get_nano is monotonic.
+ *
+ * \param var       variable
+ * \param timeout   the time in ns when the waiting should stop
+ * \return     true if the variable is zero
+ */
+bool
+os_wait_until_zero_abs_timeout(volatile int *var, int64_t timeout);
+
+#ifdef __cplusplus
 }
 #endif
 
diff --git a/src/gallium/auxiliary/pipe-loader/Makefile.am b/src/gallium/auxiliary/pipe-loader/Makefile.am
index cb6035d85c9..8c837996539 100644
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -1,37 +1,28 @@
 include Makefile.sources
+include $(top_srcdir)/src/gallium/Automake.inc
 
-AM_CPPFLAGS = $(DEFINES) \
-	$(VISIBILITY_CFLAGS) \
-	-I$(top_srcdir)/include \
-	-I$(top_srcdir)/src \
+# XXX: check if we need the gallium/winsys include
+AM_CFLAGS = \
 	-I$(top_srcdir)/src/loader \
-	-I$(top_srcdir)/src/gallium/include \
-	-I$(top_srcdir)/src/gallium/auxiliary \
-	-I$(top_srcdir)/src/gallium/winsys
+	-I$(top_srcdir)/src/gallium/winsys \
+	$(GALLIUM_PIPE_LOADER_DEFINES) \
+	$(GALLIUM_CFLAGS) \
+	$(VISIBILITY_CFLAGS)
 
 noinst_LTLIBRARIES = libpipe_loader.la
-noinst_LTLIBRARIES += libpipe_loader_client.la
+
+libpipe_loader_la_SOURCES = \
+	$(COMMON_SOURCES)
 
 if HAVE_DRM_LOADER_GALLIUM
-AM_CFLAGS = $(LIBDRM_CFLAGS)
+AM_CFLAGS += \
+	$(LIBDRM_CFLAGS)
 
-COMMON_SOURCES += $(DRM_SOURCES)
+libpipe_loader_la_SOURCES += \
+	$(DRM_SOURCES)
 
-COMMON_LIBADD = \
+libpipe_loader_la_LIBADD = \
 	$(top_builddir)/src/loader/libloader.la
 
 endif
 
-libpipe_loader_la_CFLAGS  = \
-	$(GALLIUM_PIPE_LOADER_DEFINES) \
-	$(AM_CFLAGS) $(AM_CPPFLAGS)
-libpipe_loader_la_SOURCES = $(COMMON_SOURCES)
-libpipe_loader_la_LIBADD  = $(COMMON_LIBADD) \
-	$(GALLIUM_PIPE_LOADER_LIBS)
-
-libpipe_loader_client_la_CFLAGS  = \
-	$(GALLIUM_PIPE_LOADER_CLIENT_DEFINES) \
-	$(AM_CFLAGS) $(AM_CPPFLAGS)
-libpipe_loader_client_la_SOURCES = $(COMMON_SOURCES)
-libpipe_loader_client_la_LIBADD  = $(COMMON_LIBADD) \
-	$(GALLIUM_PIPE_LOADER_CLIENT_LIBS)
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
index 9f43f17a6e2..9b8712666bb 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
@@ -36,10 +36,6 @@
 #include "pipe/p_compiler.h"
 #include "state_tracker/drm_driver.h"
 
-#ifdef HAVE_PIPE_LOADER_XLIB
-#include <X11/Xlib.h>
-#endif
-
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -116,21 +112,6 @@ pipe_loader_configuration(struct pipe_loader_device *dev,
 void
 pipe_loader_release(struct pipe_loader_device **devs, int ndev);
 
-#ifdef HAVE_PIPE_LOADER_XLIB
-
-/**
- * Initialize Xlib for an associated display.
- *
- * This function is platform-specific.
- *
- * \sa pipe_loader_probe
- */
-bool
-pipe_loader_sw_probe_xlib(struct pipe_loader_device **devs, Display *display);
-
-#endif
-
-
 #ifdef HAVE_PIPE_LOADER_DRI
 
 /**
@@ -195,13 +176,9 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev);
  * This function is platform-specific.
  *
  * \sa pipe_loader_probe
- *
- * \param auth_x If true, the pipe-loader will attempt to
- *               authenticate with the X server.
  */
 bool
-pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd,
-                         boolean auth_x);
+pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd);
 
 #endif
 
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index ffeb29906b5..1799df7e4c5 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -35,12 +35,6 @@
 #include <xf86drm.h>
 #include <unistd.h>
 
-#ifdef HAVE_PIPE_LOADER_XCB
-
-#include <xcb/dri2.h>
-
-#endif
-
 #include "loader.h"
 #include "state_tracker/drm_driver.h"
 #include "pipe_loader_priv.h"
@@ -64,78 +58,8 @@ struct pipe_loader_drm_device {
 
 static struct pipe_loader_ops pipe_loader_drm_ops;
 
-#ifdef HAVE_PIPE_LOADER_XCB
-
-static xcb_screen_t *
-get_xcb_screen(xcb_screen_iterator_t iter, int screen)
-{
-    for (; iter.rem; --screen, xcb_screen_next(&iter))
-        if (screen == 0)
-            return iter.data;
-
-    return NULL;
-}
-
-#endif
-
-static void
-pipe_loader_drm_x_auth(int fd)
-{
-#ifdef HAVE_PIPE_LOADER_XCB
-   /* Try authenticate with the X server to give us access to devices that X
-    * is running on. */
-   xcb_connection_t *xcb_conn;
-   const xcb_setup_t *xcb_setup;
-   xcb_screen_iterator_t s;
-   xcb_dri2_connect_cookie_t connect_cookie;
-   xcb_dri2_connect_reply_t *connect;
-   drm_magic_t magic;
-   xcb_dri2_authenticate_cookie_t authenticate_cookie;
-   xcb_dri2_authenticate_reply_t *authenticate;
-   int screen;
-
-   xcb_conn = xcb_connect(NULL, &screen);
-
-   if(!xcb_conn)
-      return;
-
-   xcb_setup = xcb_get_setup(xcb_conn);
-
-  if (!xcb_setup)
-    goto disconnect;
-
-   s = xcb_setup_roots_iterator(xcb_setup);
-   connect_cookie = xcb_dri2_connect_unchecked(xcb_conn,
-                                               get_xcb_screen(s, screen)->root,
-                                               XCB_DRI2_DRIVER_TYPE_DRI);
-   connect = xcb_dri2_connect_reply(xcb_conn, connect_cookie, NULL);
-
-   if (!connect || connect->driver_name_length
-                   + connect->device_name_length == 0) {
-
-      goto disconnect;
-   }
-
-   if (drmGetMagic(fd, &magic))
-      goto disconnect;
-
-   authenticate_cookie = xcb_dri2_authenticate_unchecked(xcb_conn,
-                                                         s.data->root,
-                                                         magic);
-   authenticate = xcb_dri2_authenticate_reply(xcb_conn,
-                                              authenticate_cookie,
-                                              NULL);
-   FREE(authenticate);
-
-disconnect:
-   xcb_disconnect(xcb_conn);
-
-#endif
-}
-
 bool
-pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd,
-                         boolean auth_x)
+pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
 {
    struct pipe_loader_drm_device *ddev = CALLOC_STRUCT(pipe_loader_drm_device);
    int vendor_id, chip_id;
@@ -153,9 +77,6 @@ pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd,
    ddev->base.ops = &pipe_loader_drm_ops;
    ddev->fd = fd;
 
-   if (auth_x)
-      pipe_loader_drm_x_auth(fd);
-
    ddev->base.driver_name = loader_get_driver_for_fd(fd, _LOADER_GALLIUM);
    if (!ddev->base.driver_name)
       goto fail;
@@ -169,34 +90,19 @@ pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd,
 }
 
 static int
-open_drm_minor(int minor)
-{
-   char path[PATH_MAX];
-   snprintf(path, sizeof(path), DRM_DEV_NAME, DRM_DIR_NAME, minor);
-   return open(path, O_RDWR, 0);
-}
-
-static int
 open_drm_render_node_minor(int minor)
 {
    char path[PATH_MAX];
    snprintf(path, sizeof(path), DRM_RENDER_NODE_DEV_NAME_FORMAT, DRM_DIR_NAME,
             minor);
-   return open(path, O_RDWR, 0);
+   return loader_open_device(path);
 }
 
 int
 pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev)
 {
-   int i, k, fd, num_render_node_devs;
-   int j = 0;
+   int i, j, fd;
 
-   struct {
-      unsigned vendor_id;
-      unsigned chip_id;
-   } render_node_devs[DRM_RENDER_NODE_MAX_NODES];
-
-   /* Look for render nodes first */
    for (i = DRM_RENDER_NODE_MIN_MINOR, j = 0;
         i <= DRM_RENDER_NODE_MAX_MINOR; i++) {
       fd = open_drm_render_node_minor(i);
@@ -204,14 +110,11 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev)
       if (fd < 0)
          continue;
 
-      if (!pipe_loader_drm_probe_fd(&dev, fd, false)) {
+      if (!pipe_loader_drm_probe_fd(&dev, fd)) {
          close(fd);
          continue;
       }
 
-      render_node_devs[j].vendor_id = dev->u.pci.vendor_id;
-      render_node_devs[j].chip_id = dev->u.pci.chip_id;
-
       if (j < ndev) {
          devs[j] = dev;
       } else {
@@ -221,46 +124,6 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev)
       j++;
    }
 
-   num_render_node_devs = j;
-
-   /* Next look for drm devices. */
-   for (i = 0; i < DRM_MAX_MINOR; i++) {
-      struct pipe_loader_device *dev;
-      boolean duplicate = FALSE;
-      fd = open_drm_minor(i);
-      if (fd < 0)
-         continue;
-
-      if (!pipe_loader_drm_probe_fd(&dev, fd, true)) {
-         close(fd);
-         continue;
-      }
-
-      /* Check to make sure we aren't already accessing this device via
-       * render nodes.
-       */
-      for (k = 0; k < num_render_node_devs; k++) {
-         if (dev->u.pci.vendor_id == render_node_devs[k].vendor_id &&
-             dev->u.pci.chip_id == render_node_devs[k].chip_id) {
-            close(fd);
-            dev->ops->release(&dev);
-            duplicate = TRUE;
-            break;
-         }
-      }
-
-      if (duplicate)
-         continue;
-
-      if (j < ndev) {
-         devs[j] = dev;
-      } else {
-         dev->ops->release(&dev);
-      }
-
-      j++;
-   }
-
    return j;
 }
 
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index 3d332645231..6794930193d 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -32,10 +32,6 @@
 #include "sw/dri/dri_sw_winsys.h"
 #include "sw/null/null_sw_winsys.h"
 #include "sw/wrapper/wrapper_sw_winsys.h"
-#ifdef HAVE_PIPE_LOADER_XLIB
-/* Explicitly wrap the header to ease build without X11 headers */
-#include "sw/xlib/xlib_sw_winsys.h"
-#endif
 #include "target-helpers/inline_sw_helper.h"
 #include "state_tracker/drisw_api.h"
 
@@ -53,29 +49,6 @@ static struct sw_winsys *(*backends[])() = {
    null_sw_create
 };
 
-#ifdef HAVE_PIPE_LOADER_XLIB
-bool
-pipe_loader_sw_probe_xlib(struct pipe_loader_device **devs, Display *display)
-{
-   struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
-
-   if (!sdev)
-      return false;
-
-   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
-   sdev->base.driver_name = "swrast";
-   sdev->base.ops = &pipe_loader_sw_ops;
-   sdev->ws = xlib_create_sw_winsys(display);
-   if (!sdev->ws) {
-      FREE(sdev);
-      return false;
-   }
-   *devs = &sdev->base;
-
-   return true;
-}
-#endif
-
 #ifdef HAVE_PIPE_LOADER_DRI
 bool
 pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf)
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 03bdce31513..ba48d461d5c 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -158,7 +158,7 @@ struct pb_vtbl
 
 /* Accessor functions for pb->vtbl:
  */
-static INLINE void *
+static inline void *
 pb_map(struct pb_buffer *buf, 
        unsigned flags, void *flush_ctx)
 {
@@ -170,7 +170,7 @@ pb_map(struct pb_buffer *buf,
 }
 
 
-static INLINE void 
+static inline void 
 pb_unmap(struct pb_buffer *buf)
 {
    assert(buf);
@@ -181,7 +181,7 @@ pb_unmap(struct pb_buffer *buf)
 }
 
 
-static INLINE void
+static inline void
 pb_get_base_buffer( struct pb_buffer *buf,
 		    struct pb_buffer **base_buf,
 		    pb_size *offset )
@@ -200,7 +200,7 @@ pb_get_base_buffer( struct pb_buffer *buf,
 }
 
 
-static INLINE enum pipe_error 
+static inline enum pipe_error 
 pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags)
 {
    assert(buf);
@@ -211,7 +211,7 @@ pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags)
 }
 
 
-static INLINE void 
+static inline void 
 pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence)
 {
    assert(buf);
@@ -222,7 +222,7 @@ pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence)
 }
 
 
-static INLINE void 
+static inline void 
 pb_destroy(struct pb_buffer *buf)
 {
    assert(buf);
@@ -232,7 +232,7 @@ pb_destroy(struct pb_buffer *buf)
    buf->vtbl->destroy(buf);
 }
 
-static INLINE void
+static inline void
 pb_reference(struct pb_buffer **dst,
              struct pb_buffer *src)
 {
@@ -248,7 +248,7 @@ pb_reference(struct pb_buffer **dst,
  * Utility function to check whether the provided alignment is consistent with
  * the requested or not.
  */
-static INLINE boolean
+static inline boolean
 pb_check_alignment(pb_size requested, pb_size provided)
 {
    if(!requested)
@@ -265,7 +265,7 @@ pb_check_alignment(pb_size requested, pb_size provided)
  * Utility function to check whether the provided alignment is consistent with
  * the requested or not.
  */
-static INLINE boolean
+static inline boolean
 pb_check_usage(unsigned requested, unsigned provided)
 {
    return (requested & provided) == requested ? TRUE : FALSE;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index fc81e11b972..08935b4dec7 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -149,7 +149,7 @@ struct fenced_buffer
 };
 
 
-static INLINE struct fenced_manager *
+static inline struct fenced_manager *
 fenced_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -157,7 +157,7 @@ fenced_manager(struct pb_manager *mgr)
 }
 
 
-static INLINE struct fenced_buffer *
+static inline struct fenced_buffer *
 fenced_buffer(struct pb_buffer *buf)
 {
    assert(buf);
@@ -240,7 +240,7 @@ fenced_manager_dump_locked(struct fenced_manager *fenced_mgr)
 }
 
 
-static INLINE void
+static inline void
 fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr,
                              struct fenced_buffer *fenced_buf)
 {
@@ -265,7 +265,7 @@ fenced_buffer_destroy_locked(struct fenced_manager *fenced_mgr,
  *
  * Reference count should be incremented before calling this function.
  */
-static INLINE void
+static inline void
 fenced_buffer_add_locked(struct fenced_manager *fenced_mgr,
                          struct fenced_buffer *fenced_buf)
 {
@@ -289,7 +289,7 @@ fenced_buffer_add_locked(struct fenced_manager *fenced_mgr,
  *
  * Returns TRUE if the buffer was detroyed.
  */
-static INLINE boolean
+static inline boolean
 fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr,
                             struct fenced_buffer *fenced_buf)
 {
@@ -326,7 +326,7 @@ fenced_buffer_remove_locked(struct fenced_manager *fenced_mgr,
  * This function will release and re-acquire the mutex, so any copy of mutable
  * state must be discarded after calling it.
  */
-static INLINE enum pipe_error
+static inline enum pipe_error
 fenced_buffer_finish_locked(struct fenced_manager *fenced_mgr,
                             struct fenced_buffer *fenced_buf)
 {
@@ -550,7 +550,7 @@ fenced_buffer_destroy_gpu_storage_locked(struct fenced_buffer *fenced_buf)
  * This function is a shorthand around pb_manager::create_buffer for
  * fenced_buffer_create_gpu_storage_locked()'s benefit.
  */
-static INLINE boolean
+static inline boolean
 fenced_buffer_try_create_gpu_storage_locked(struct fenced_manager *fenced_mgr,
                                             struct fenced_buffer *fenced_buf)
 {
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
index bf1a538bf79..b97771457d6 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
@@ -49,7 +49,7 @@ struct malloc_buffer
 
 extern const struct pb_vtbl malloc_buffer_vtbl;
 
-static INLINE struct malloc_buffer *
+static inline struct malloc_buffer *
 malloc_buffer(struct pb_buffer *buf)
 {
    assert(buf);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
index 62df2a6b9de..47cbaeb20ac 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
@@ -50,7 +50,7 @@ struct pb_alt_manager
 };
 
 
-static INLINE struct pb_alt_manager *
+static inline struct pb_alt_manager *
 pb_alt_manager(struct pb_manager *mgr)
 {
    assert(mgr);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 5023687ec04..3b35049f679 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -88,7 +88,7 @@ struct pb_cache_manager
 };
 
 
-static INLINE struct pb_cache_buffer *
+static inline struct pb_cache_buffer *
 pb_cache_buffer(struct pb_buffer *buf)
 {
    assert(buf);
@@ -96,7 +96,7 @@ pb_cache_buffer(struct pb_buffer *buf)
 }
 
 
-static INLINE struct pb_cache_manager *
+static inline struct pb_cache_manager *
 pb_cache_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -107,7 +107,7 @@ pb_cache_manager(struct pb_manager *mgr)
 /**
  * Actually destroy the buffer.
  */
-static INLINE void
+static inline void
 _pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
 {
    struct pb_cache_manager *mgr = buf->mgr;
@@ -235,7 +235,7 @@ pb_cache_buffer_vtbl = {
 };
 
 
-static INLINE int
+static inline int
 pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,  
                           pb_size size,
                           const struct pb_desc *desc)
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index 6236afb70d1..7ad70f293a6 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -99,7 +99,7 @@ struct pb_debug_manager
 };
 
 
-static INLINE struct pb_debug_buffer *
+static inline struct pb_debug_buffer *
 pb_debug_buffer(struct pb_buffer *buf)
 {
    assert(buf);
@@ -107,7 +107,7 @@ pb_debug_buffer(struct pb_buffer *buf)
 }
 
 
-static INLINE struct pb_debug_manager *
+static inline struct pb_debug_manager *
 pb_debug_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -123,7 +123,7 @@ static const uint8_t random_pattern[32] = {
 };
 
 
-static INLINE void 
+static inline void 
 fill_random_pattern(uint8_t *dst, pb_size size)
 {
    pb_size i = 0;
@@ -134,7 +134,7 @@ fill_random_pattern(uint8_t *dst, pb_size size)
 }
 
 
-static INLINE boolean 
+static inline boolean 
 check_random_pattern(const uint8_t *dst, pb_size size, 
                      pb_size *min_ofs, pb_size *max_ofs) 
 {
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index 84eb6edda34..72099ba5850 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -65,7 +65,7 @@ struct mm_pb_manager
 };
 
 
-static INLINE struct mm_pb_manager *
+static inline struct mm_pb_manager *
 mm_pb_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -83,7 +83,7 @@ struct mm_buffer
 };
 
 
-static INLINE struct mm_buffer *
+static inline struct mm_buffer *
 mm_buffer(struct pb_buffer *buf)
 {
    assert(buf);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
index 77e642ada08..c20e2dca02d 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
@@ -70,7 +70,7 @@ struct pb_ondemand_manager
 
 extern const struct pb_vtbl pb_ondemand_buffer_vtbl;
 
-static INLINE struct pb_ondemand_buffer *
+static inline struct pb_ondemand_buffer *
 pb_ondemand_buffer(struct pb_buffer *buf)
 {
    assert(buf);
@@ -80,7 +80,7 @@ pb_ondemand_buffer(struct pb_buffer *buf)
    return (struct pb_ondemand_buffer *)buf;
 }
 
-static INLINE struct pb_ondemand_manager *
+static inline struct pb_ondemand_manager *
 pb_ondemand_manager(struct pb_manager *mgr)
 {
    assert(mgr);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
index 51525b0f97c..56a5e82ece0 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
@@ -73,7 +73,7 @@ struct pool_pb_manager
 };
 
 
-static INLINE struct pool_pb_manager *
+static inline struct pool_pb_manager *
 pool_pb_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -93,7 +93,7 @@ struct pool_buffer
 };
 
 
-static INLINE struct pool_buffer *
+static inline struct pool_buffer *
 pool_buffer(struct pb_buffer *buf)
 {
    assert(buf);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index 6a62b4f5fdb..aadeaa087f4 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -163,7 +163,7 @@ struct pb_slab_range_manager
 };
 
 
-static INLINE struct pb_slab_buffer *
+static inline struct pb_slab_buffer *
 pb_slab_buffer(struct pb_buffer *buf)
 {
    assert(buf);
@@ -171,7 +171,7 @@ pb_slab_buffer(struct pb_buffer *buf)
 }
 
 
-static INLINE struct pb_slab_manager *
+static inline struct pb_slab_manager *
 pb_slab_manager(struct pb_manager *mgr)
 {
    assert(mgr);
@@ -179,7 +179,7 @@ pb_slab_manager(struct pb_manager *mgr)
 }
 
 
-static INLINE struct pb_slab_range_manager *
+static inline struct pb_slab_range_manager *
 pb_slab_range_manager(struct pb_manager *mgr)
 {
    assert(mgr);
diff --git a/src/gallium/auxiliary/postprocess/pp_colors.c b/src/gallium/auxiliary/postprocess/pp_colors.c
index 247e4df72a4..e6ea0102eac 100644
--- a/src/gallium/auxiliary/postprocess/pp_colors.c
+++ b/src/gallium/auxiliary/postprocess/pp_colors.c
@@ -37,6 +37,7 @@ pp_nocolor(struct pp_queue_t *ppq, struct pipe_resource *in,
 {
 
    struct pp_program *p = ppq->p;
+   const struct pipe_sampler_state *samplers[] = {&p->sampler_point};
 
    pp_filter_setup_in(p, in);
    pp_filter_setup_out(p, out);
@@ -44,8 +45,7 @@ pp_nocolor(struct pp_queue_t *ppq, struct pipe_resource *in,
    pp_filter_set_fb(p);
    pp_filter_misc_state(p);
 
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler_point);
-   cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+   cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 1, samplers);
    cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->view);
 
    cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][0]);
diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.c b/src/gallium/auxiliary/postprocess/pp_mlaa.c
index 147d14de95d..024a24895c8 100644
--- a/src/gallium/auxiliary/postprocess/pp_mlaa.c
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa.c
@@ -141,8 +141,10 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
    p->pipe->clear(p->pipe, PIPE_CLEAR_STENCIL | PIPE_CLEAR_COLOR0,
                   &p->clear_color, 0, 0);
 
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler_point);
-   cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+   {
+      const struct pipe_sampler_state *samplers[] = {&p->sampler_point};
+      cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 1, samplers);
+   }
    cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->view);
 
    cso_set_vertex_shader_handle(p->cso, ppq->shaders[n][1]);    /* offsetvs */
@@ -168,10 +170,11 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
 
    pp_filter_set_clear_fb(p);
 
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler_point);
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->sampler_point);
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 2, &p->sampler);
-   cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+   {
+      const struct pipe_sampler_state *samplers[] =
+         {&p->sampler_point, &p->sampler_point, &p->sampler};
+      cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 3, samplers);
+   }
 
    arr[0] = p->view;
    cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 3, arr);
@@ -199,9 +202,11 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct pipe_resource *in,
    u_sampler_view_default_template(&v_tmp, in, in->format);
    arr[0] = p->pipe->create_sampler_view(p->pipe, in, &v_tmp);
 
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 0, &p->sampler_point);
-   cso_single_sampler(p->cso, PIPE_SHADER_FRAGMENT, 1, &p->sampler_point);
-   cso_single_sampler_done(p->cso, PIPE_SHADER_FRAGMENT);
+   {
+      const struct pipe_sampler_state *samplers[] =
+         {&p->sampler_point, &p->sampler_point};
+      cso_set_samplers(p->cso, PIPE_SHADER_FRAGMENT, 2, samplers);
+   }
 
    arr[1] = p->view;
    cso_set_sampler_views(p->cso, PIPE_SHADER_FRAGMENT, 2, arr);
diff --git a/src/gallium/auxiliary/postprocess/pp_run.c b/src/gallium/auxiliary/postprocess/pp_run.c
index e76ce854442..caa2062f4cf 100644
--- a/src/gallium/auxiliary/postprocess/pp_run.c
+++ b/src/gallium/auxiliary/postprocess/pp_run.c
@@ -125,8 +125,8 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
    cso_save_rasterizer(cso);
    cso_save_sample_mask(cso);
    cso_save_min_samples(cso);
-   cso_save_samplers(cso, PIPE_SHADER_FRAGMENT);
-   cso_save_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+   cso_save_fragment_samplers(cso);
+   cso_save_fragment_sampler_views(cso);
    cso_save_stencil_ref(cso);
    cso_save_stream_outputs(cso);
    cso_save_vertex_elements(cso);
@@ -196,8 +196,8 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
    cso_restore_rasterizer(cso);
    cso_restore_sample_mask(cso);
    cso_restore_min_samples(cso);
-   cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT);
-   cso_restore_sampler_views(cso, PIPE_SHADER_FRAGMENT);
+   cso_restore_fragment_samplers(cso);
+   cso_restore_fragment_sampler_views(cso);
    cso_restore_stencil_ref(cso);
    cso_restore_stream_outputs(cso);
    cso_restore_vertex_elements(cso);
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index f9637889187..27ee8f1242a 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -510,7 +510,7 @@ void x86_mov8_imm( struct x86_function *p, struct x86_reg dst, uint8_t imm )
 /**
  * Immediate group 1 instructions.
  */
-static INLINE void 
+static inline void 
 x86_group1_imm( struct x86_function *p, 
                 unsigned op, struct x86_reg dst, int imm )
 {
@@ -2196,7 +2196,7 @@ void x86_release_func( struct x86_function *p )
 }
 
 
-static INLINE x86_func
+static inline x86_func
 voidptr_to_x86_func(void *v)
 {
    union {
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 498ca824cd1..b44d917cd43 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -136,7 +136,7 @@ enum x86_target
 };
 
 /* make this read a member of x86_function if target != host is desired */
-static INLINE enum x86_target x86_target( struct x86_function* p )
+static inline enum x86_target x86_target( struct x86_function* p )
 {
 #ifdef PIPE_ARCH_X86
    return X86_32;
@@ -147,7 +147,7 @@ static INLINE enum x86_target x86_target( struct x86_function* p )
 #endif
 }
 
-static INLINE unsigned x86_target_caps( struct x86_function* p )
+static inline unsigned x86_target_caps( struct x86_function* p )
 {
    return p->caps;
 }
diff --git a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
index 0648e596549..d353ab81e34 100644
--- a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
@@ -27,7 +27,7 @@
  * TODO: Audit the following *screen_create() - all of
  * them should return the original screen on failuire.
  */
-static INLINE struct pipe_screen *
+static inline struct pipe_screen *
 debug_screen_wrap(struct pipe_screen *screen)
 {
 #if defined(GALLIUM_RBUG)
diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
index d3c331d224d..08271a760f5 100644
--- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
@@ -42,6 +42,7 @@
 #if GALLIUM_RADEONSI
 #include "radeon/radeon_winsys.h"
 #include "radeon/drm/radeon_drm_public.h"
+#include "amdgpu/drm/amdgpu_public.h"
 #include "radeonsi/si_public.h"
 #endif
 
@@ -228,7 +229,12 @@ pipe_radeonsi_create_screen(int fd)
 {
    struct radeon_winsys *rw;
 
-   rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+   /* First, try amdgpu. */
+   rw = amdgpu_winsys_create(fd, radeonsi_screen_create);
+
+   if (!rw)
+      rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+
    return rw ? debug_screen_wrap(rw->screen) : NULL;
 }
 #endif
diff --git a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
index d8cee2b2917..5f46552f6c3 100644
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -20,7 +20,7 @@
 #endif
 
 
-static INLINE struct pipe_screen *
+static inline struct pipe_screen *
 sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
 {
    struct pipe_screen *screen = NULL;
@@ -39,7 +39,7 @@ sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
 }
 
 
-static INLINE struct pipe_screen *
+static inline struct pipe_screen *
 sw_screen_create(struct sw_winsys *winsys)
 {
    const char *default_driver;
@@ -71,7 +71,7 @@ PUBLIC const __DRIextension **__driDriverGetExtensions_swrast(void)
    return galliumsw_driver_extensions;
 }
 
-INLINE struct pipe_screen *
+inline struct pipe_screen *
 drisw_create_screen(struct drisw_loader_funcs *lf)
 {
    struct sw_winsys *winsys = NULL;
@@ -98,7 +98,7 @@ drisw_create_screen(struct drisw_loader_funcs *lf)
 
 extern struct pipe_screen *ninesw_create_screen(struct pipe_screen *screen);
 
-INLINE struct pipe_screen *
+inline struct pipe_screen *
 ninesw_create_screen(struct pipe_screen *pscreen)
 {
    struct sw_winsys *winsys = NULL;
diff --git a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
index 0a2e215352b..4f38ba9f919 100644
--- a/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_wrapper_sw_helper.h
@@ -9,7 +9,7 @@
  * Try to wrap a hw screen with a software screen.
  * On failure will return given screen.
  */
-static INLINE struct pipe_screen *
+static inline struct pipe_screen *
 sw_screen_wrap(struct pipe_screen *screen)
 {
 #if defined(GALLIUM_SOFTPIPE) || defined(GALLIUM_LLVMPIPE)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index c80d7a20481..8ceb5b47584 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -48,6 +48,7 @@ struct dump_ctx
    int indent;
    
    uint indentation;
+   FILE *file;
 
    void (*dump_printf)(struct dump_ctx *ctx, const char *format, ...);
 };
@@ -58,7 +59,10 @@ dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
    va_list ap;
    (void)ctx;
    va_start(ap, format);
-   _debug_vprintf(format, ap);
+   if (ctx->file)
+      vfprintf(ctx->file, format, ap);
+   else
+      _debug_vprintf(format, ap);
    va_end(ap);
 }
 
@@ -659,9 +663,7 @@ prolog(
 }
 
 void
-tgsi_dump(
-   const struct tgsi_token *tokens,
-   uint flags )
+tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file)
 {
    struct dump_ctx ctx;
 
@@ -677,10 +679,17 @@ tgsi_dump(
    ctx.indent = 0;
    ctx.dump_printf = dump_ctx_printf;
    ctx.indentation = 0;
+   ctx.file = file;
 
    tgsi_iterate_shader( tokens, &ctx.iter );
 }
 
+void
+tgsi_dump(const struct tgsi_token *tokens, uint flags)
+{
+   tgsi_dump_to_file(tokens, flags, NULL);
+}
+
 struct str_dump_ctx
 {
    struct dump_ctx base;
@@ -733,6 +742,7 @@ tgsi_dump_str(
    ctx.base.indent = 0;
    ctx.base.dump_printf = &str_dump_ctx_printf;
    ctx.base.indentation = 0;
+   ctx.base.file = NULL;
 
    ctx.str = str;
    ctx.str[0] = 0;
@@ -756,6 +766,7 @@ tgsi_dump_instruction_str(
    ctx.base.indent = 0;
    ctx.base.dump_printf = &str_dump_ctx_printf;
    ctx.base.indentation = 0;
+   ctx.base.file = NULL;
 
    ctx.str = str;
    ctx.str[0] = 0;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index bc873a54ae9..7c8f92ee7bc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -32,6 +32,8 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 
+#include <stdio.h>
+
 #if defined __cplusplus
 extern "C" {
 #endif
@@ -44,6 +46,9 @@ tgsi_dump_str(
    size_t size);
 
 void
+tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file);
+
+void
 tgsi_dump(
    const struct tgsi_token *tokens,
    uint flags );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 44000ffdb6c..75cd0d53c5a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -735,7 +735,7 @@ static const union tgsi_exec_channel M128Vec = {
  * not lead to crashes, etc.  But when debugging, it's helpful to catch
  * them.
  */
-static INLINE void
+static inline void
 check_inf_or_nan(const union tgsi_exec_channel *chan)
 {
    assert(!util_is_inf_or_nan((chan)->f[0]));
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 208640cfd46..5d56aab2216 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -213,7 +213,7 @@ struct tgsi_sampler
  * input register files, this is the stride between two 1D
  * arrays.
  */
-#define TGSI_EXEC_MAX_INPUT_ATTRIBS PIPE_MAX_SHADER_INPUTS
+#define TGSI_EXEC_MAX_INPUT_ATTRIBS 32
 
 /* The maximum number of bytes per constant buffer.
  */
@@ -386,7 +386,7 @@ boolean
 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst);
 
 
-static INLINE void
+static inline void
 tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
 {
    mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] =
@@ -395,7 +395,7 @@ tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
 
 
 /** Set execution mask values prior to executing the shader */
-static INLINE void
+static inline void
 tgsi_set_exec_mask(struct tgsi_exec_machine *mach,
                    boolean ch0, boolean ch1, boolean ch2, boolean ch3)
 {
@@ -414,7 +414,7 @@ tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
                                const unsigned *buf_sizes);
 
 
-static INLINE int
+static inline int
 tgsi_exec_get_shader_param(enum pipe_shader_cap param)
 {
    switch(param) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 929531109e5..fb29ea0d53d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -316,7 +316,7 @@ tgsi_get_processor_name( uint processor )
  *
  * MOV and UCMP is special so return VOID
  */
-static INLINE enum tgsi_opcode_type
+static inline enum tgsi_opcode_type
 tgsi_opcode_infer_type( uint opcode )
 {
    switch (opcode) {
@@ -374,7 +374,34 @@ tgsi_opcode_infer_type( uint opcode )
    case TGSI_OPCODE_IMUL_HI:
    case TGSI_OPCODE_IBFE:
    case TGSI_OPCODE_IMSB:
+   case TGSI_OPCODE_DSEQ:
+   case TGSI_OPCODE_DSGE:
+   case TGSI_OPCODE_DSLT:
+   case TGSI_OPCODE_DSNE:
       return TGSI_TYPE_SIGNED;
+   case TGSI_OPCODE_DADD:
+   case TGSI_OPCODE_DABS:
+   case TGSI_OPCODE_DFMA:
+   case TGSI_OPCODE_DNEG:
+   case TGSI_OPCODE_DMUL:
+   case TGSI_OPCODE_DMAX:
+   case TGSI_OPCODE_DMIN:
+   case TGSI_OPCODE_DRCP:
+   case TGSI_OPCODE_DSQRT:
+   case TGSI_OPCODE_DMAD:
+   case TGSI_OPCODE_DLDEXP:
+   case TGSI_OPCODE_DFRACEXP:
+   case TGSI_OPCODE_DFRAC:
+   case TGSI_OPCODE_DRSQ:
+   case TGSI_OPCODE_DTRUNC:
+   case TGSI_OPCODE_DCEIL:
+   case TGSI_OPCODE_DFLR:
+   case TGSI_OPCODE_DROUND:
+   case TGSI_OPCODE_DSSG:
+   case TGSI_OPCODE_F2D:
+   case TGSI_OPCODE_I2D:
+   case TGSI_OPCODE_U2D:
+      return TGSI_TYPE_DOUBLE;
    default:
       return TGSI_TYPE_FLOAT;
    }
@@ -391,6 +418,7 @@ tgsi_opcode_infer_src_type( uint opcode )
    case TGSI_OPCODE_TXF:
    case TGSI_OPCODE_BREAKC:
    case TGSI_OPCODE_U2F:
+   case TGSI_OPCODE_U2D:
    case TGSI_OPCODE_UADD:
    case TGSI_OPCODE_SWITCH:
    case TGSI_OPCODE_CASE:
@@ -400,10 +428,12 @@ tgsi_opcode_infer_src_type( uint opcode )
       return TGSI_TYPE_UNSIGNED;
    case TGSI_OPCODE_IMUL_HI:
    case TGSI_OPCODE_I2F:
+   case TGSI_OPCODE_I2D:
       return TGSI_TYPE_SIGNED;
    case TGSI_OPCODE_ARL:
    case TGSI_OPCODE_ARR:
    case TGSI_OPCODE_TXQ_LZ:
+   case TGSI_OPCODE_F2D:
    case TGSI_OPCODE_F2I:
    case TGSI_OPCODE_F2U:
    case TGSI_OPCODE_FSEQ:
@@ -412,6 +442,14 @@ tgsi_opcode_infer_src_type( uint opcode )
    case TGSI_OPCODE_FSNE:
    case TGSI_OPCODE_UCMP:
       return TGSI_TYPE_FLOAT;
+   case TGSI_OPCODE_D2F:
+   case TGSI_OPCODE_D2U:
+   case TGSI_OPCODE_D2I:
+   case TGSI_OPCODE_DSEQ:
+   case TGSI_OPCODE_DSGE:
+   case TGSI_OPCODE_DSLT:
+   case TGSI_OPCODE_DSNE:
+      return TGSI_TYPE_DOUBLE;
    default:
       return tgsi_opcode_infer_type(opcode);
    }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 1162b265522..0729b5d2426 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -36,7 +36,7 @@ tgsi_parse_init(
    const struct tgsi_token *tokens )
 {
    ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[0];
-   if( ctx->FullHeader.Header.HeaderSize >= 2 ) {
+   if (ctx->FullHeader.Header.HeaderSize >= 2) {
       ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[1];
    }
    else {
@@ -69,7 +69,7 @@ tgsi_parse_end_of_tokens(
  * warnings.  The warnings seem harmless on x86 but on PPC they cause
  * real failures.
  */
-static INLINE void
+static inline void
 copy_token(void *dst, const void *src)
 {
    memcpy(dst, src, 4);
@@ -113,11 +113,11 @@ tgsi_parse_token(
          next_token(ctx, &decl->Dim);
       }
 
-      if( decl->Declaration.Interpolate ) {
+      if (decl->Declaration.Interpolate) {
          next_token( ctx, &decl->Interp );
       }
 
-      if( decl->Declaration.Semantic ) {
+      if (decl->Declaration.Semantic) {
          next_token( ctx, &decl->Semantic );
       }
 
@@ -129,7 +129,7 @@ tgsi_parse_token(
          next_token(ctx, &decl->SamplerView);
       }
 
-      if( decl->Declaration.Array ) {
+      if (decl->Declaration.Array) {
          next_token(ctx, &decl->Array);
       }
 
@@ -190,21 +190,21 @@ tgsi_parse_token(
 
       if (inst->Instruction.Texture) {
          next_token( ctx, &inst->Texture);
-         for( i = 0; i < inst->Texture.NumOffsets; i++ ) {
+         for (i = 0; i < inst->Texture.NumOffsets; i++) {
             next_token( ctx, &inst->TexOffsets[i] );
          }
       }
 
       assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
 
-      for(  i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
+      for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
 
          next_token( ctx, &inst->Dst[i].Register );
 
-         if( inst->Dst[i].Register.Indirect )
+         if (inst->Dst[i].Register.Indirect)
             next_token( ctx, &inst->Dst[i].Indirect );
 
-         if( inst->Dst[i].Register.Dimension ) {
+         if (inst->Dst[i].Register.Dimension) {
             next_token( ctx, &inst->Dst[i].Dimension );
 
             /*
@@ -212,21 +212,21 @@ tgsi_parse_token(
              */
             assert( !inst->Dst[i].Dimension.Dimension );
 
-            if( inst->Dst[i].Dimension.Indirect )
+            if (inst->Dst[i].Dimension.Indirect)
                next_token( ctx, &inst->Dst[i].DimIndirect );
          }
       }
 
       assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS );
 
-      for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
+      for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
 
          next_token( ctx, &inst->Src[i].Register );
 
-         if( inst->Src[i].Register.Indirect )
+         if (inst->Src[i].Register.Indirect)
             next_token( ctx, &inst->Src[i].Indirect );
 
-         if( inst->Src[i].Register.Dimension ) {
+         if (inst->Src[i].Register.Dimension) {
             next_token( ctx, &inst->Src[i].Dimension );
 
             /*
@@ -234,7 +234,7 @@ tgsi_parse_token(
              */
             assert( !inst->Src[i].Dimension.Dimension );
 
-            if( inst->Src[i].Dimension.Indirect )
+            if (inst->Src[i].Dimension.Indirect)
                next_token( ctx, &inst->Src[i].DimIndirect );
          }
       }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index cd4b2afdb8b..35e1c7cfd62 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -133,7 +133,7 @@ void
 tgsi_parse_token(
    struct tgsi_parse_context *ctx );
 
-static INLINE unsigned
+static inline unsigned
 tgsi_num_tokens(const struct tgsi_token *tokens)
 {
    struct tgsi_header header;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index be4851f5dcb..d14372feb30 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -63,7 +63,7 @@ struct sanity_check_ctx
    boolean print;
 };
 
-static INLINE unsigned
+static inline unsigned
 scan_register_key(const scan_register *reg)
 {
    unsigned key = reg->file;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c
index 6b6a14f55f5..8271ea08177 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
@@ -203,7 +203,7 @@ const char *tgsi_immediate_type_names[4] =
 };
 
 
-static INLINE void
+static inline void
 tgsi_strings_check(void)
 {
    STATIC_ASSERT(Elements(tgsi_semantic_names) == TGSI_SEMANTIC_COUNT);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h
index 39d7688ab3b..ceb7c2e0f46 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_transform.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h
@@ -94,7 +94,7 @@ struct tgsi_transform_context
 /**
  * Helper for emitting temporary register declarations.
  */
-static INLINE void
+static inline void
 tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
                          unsigned index)
 {
@@ -108,7 +108,7 @@ tgsi_transform_temp_decl(struct tgsi_transform_context *ctx,
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
                           unsigned index,
                           unsigned sem_name, unsigned sem_index,
@@ -130,7 +130,7 @@ tgsi_transform_input_decl(struct tgsi_transform_context *ctx,
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx,
                             unsigned index)
 {
@@ -143,7 +143,7 @@ tgsi_transform_sampler_decl(struct tgsi_transform_context *ctx,
    ctx->emit_declaration(ctx, &decl);
 }
 
-static INLINE void
+static inline void
 tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx,
                                  unsigned index,
                                  unsigned target,
@@ -165,7 +165,7 @@ tgsi_transform_sampler_view_decl(struct tgsi_transform_context *ctx,
    ctx->emit_declaration(ctx, &decl);
 }
 
-static INLINE void
+static inline void
 tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx,
                               float x, float y, float z, float w)
 {
@@ -186,7 +186,7 @@ tgsi_transform_immediate_decl(struct tgsi_transform_context *ctx,
 /**
  * Helper for emitting 1-operand instructions.
  */
-static INLINE void
+static inline void
 tgsi_transform_op1_inst(struct tgsi_transform_context *ctx,
                         unsigned opcode,
                         unsigned dst_file,
@@ -211,7 +211,7 @@ tgsi_transform_op1_inst(struct tgsi_transform_context *ctx,
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_op2_inst(struct tgsi_transform_context *ctx,
                         unsigned opcode,
                         unsigned dst_file,
@@ -240,7 +240,7 @@ tgsi_transform_op2_inst(struct tgsi_transform_context *ctx,
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_op1_swz_inst(struct tgsi_transform_context *ctx,
                             unsigned opcode,
                             unsigned dst_file,
@@ -282,7 +282,7 @@ tgsi_transform_op1_swz_inst(struct tgsi_transform_context *ctx,
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_op2_swz_inst(struct tgsi_transform_context *ctx,
                             unsigned opcode,
                             unsigned dst_file,
@@ -333,7 +333,7 @@ tgsi_transform_op2_swz_inst(struct tgsi_transform_context *ctx,
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_op3_swz_inst(struct tgsi_transform_context *ctx,
                             unsigned opcode,
                             unsigned dst_file,
@@ -395,7 +395,7 @@ tgsi_transform_op3_swz_inst(struct tgsi_transform_context *ctx,
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
                          unsigned src_file,
                          unsigned src_index,
@@ -419,7 +419,7 @@ tgsi_transform_kill_inst(struct tgsi_transform_context *ctx,
 }
 
 
-static INLINE void
+static inline void
 tgsi_transform_tex_2d_inst(struct tgsi_transform_context *ctx,
                            unsigned dst_file,
                            unsigned dst_index,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 201a849ef95..3d213195090 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -1830,7 +1830,7 @@ void ureg_free_tokens( const struct tgsi_token *tokens )
 }
 
 
-static INLINE unsigned
+static inline unsigned
 pipe_shader_from_tgsi_processor(unsigned processor)
 {
    switch (processor) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index 1891b068774..0aae550d60a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -140,7 +140,7 @@ ureg_destroy( struct ureg_program * );
 /***********************************************************************
  * Convenience routine:
  */
-static INLINE void *
+static inline void *
 ureg_create_shader_with_so_and_destroy( struct ureg_program *p,
 			struct pipe_context *pipe,
 			const struct pipe_stream_output_info *so )
@@ -150,7 +150,7 @@ ureg_create_shader_with_so_and_destroy( struct ureg_program *p,
    return result;
 }
 
-static INLINE void *
+static inline void *
 ureg_create_shader_and_destroy( struct ureg_program *p,
                                 struct pipe_context *pipe )
 {
@@ -180,7 +180,7 @@ ureg_DECL_fs_input_cyl_centroid(struct ureg_program *,
                        unsigned array_id,
                        unsigned array_size);
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
                        unsigned semantic_name,
                        unsigned semantic_index,
@@ -195,7 +195,7 @@ ureg_DECL_fs_input_cyl(struct ureg_program *ureg,
                                  0, 0, 1);
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_DECL_fs_input(struct ureg_program *ureg,
                    unsigned semantic_name,
                    unsigned semantic_index,
@@ -328,7 +328,7 @@ ureg_DECL_sampler_view(struct ureg_program *,
                        unsigned return_type_w );
 
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm4f( struct ureg_program *ureg,
                        float a, float b,
                        float c, float d)
@@ -341,7 +341,7 @@ ureg_imm4f( struct ureg_program *ureg,
    return ureg_DECL_immediate( ureg, v, 4 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm3f( struct ureg_program *ureg,
                        float a, float b,
                        float c)
@@ -353,7 +353,7 @@ ureg_imm3f( struct ureg_program *ureg,
    return ureg_DECL_immediate( ureg, v, 3 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm2f( struct ureg_program *ureg,
                        float a, float b)
 {
@@ -363,7 +363,7 @@ ureg_imm2f( struct ureg_program *ureg,
    return ureg_DECL_immediate( ureg, v, 2 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm1f( struct ureg_program *ureg,
                        float a)
 {
@@ -372,7 +372,7 @@ ureg_imm1f( struct ureg_program *ureg,
    return ureg_DECL_immediate( ureg, v, 1 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm4u( struct ureg_program *ureg,
             unsigned a, unsigned b,
             unsigned c, unsigned d)
@@ -385,7 +385,7 @@ ureg_imm4u( struct ureg_program *ureg,
    return ureg_DECL_immediate_uint( ureg, v, 4 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm3u( struct ureg_program *ureg,
             unsigned a, unsigned b,
             unsigned c)
@@ -397,7 +397,7 @@ ureg_imm3u( struct ureg_program *ureg,
    return ureg_DECL_immediate_uint( ureg, v, 3 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm2u( struct ureg_program *ureg,
             unsigned a, unsigned b)
 {
@@ -407,14 +407,14 @@ ureg_imm2u( struct ureg_program *ureg,
    return ureg_DECL_immediate_uint( ureg, v, 2 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm1u( struct ureg_program *ureg,
             unsigned a)
 {
    return ureg_DECL_immediate_uint( ureg, &a, 1 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm4i( struct ureg_program *ureg,
             int a, int b,
             int c, int d)
@@ -427,7 +427,7 @@ ureg_imm4i( struct ureg_program *ureg,
    return ureg_DECL_immediate_int( ureg, v, 4 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm3i( struct ureg_program *ureg,
             int a, int b,
             int c)
@@ -439,7 +439,7 @@ ureg_imm3i( struct ureg_program *ureg,
    return ureg_DECL_immediate_int( ureg, v, 3 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm2i( struct ureg_program *ureg,
             int a, int b)
 {
@@ -449,7 +449,7 @@ ureg_imm2i( struct ureg_program *ureg,
    return ureg_DECL_immediate_int( ureg, v, 2 );
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_imm1i( struct ureg_program *ureg,
             int a)
 {
@@ -459,7 +459,7 @@ ureg_imm1i( struct ureg_program *ureg,
 /* Where the destination register has a valid file, but an empty
  * writemask.
  */
-static INLINE boolean
+static inline boolean
 ureg_dst_is_empty( struct ureg_dst dst )
 {
    return dst.File != TGSI_FILE_NULL &&
@@ -573,7 +573,7 @@ ureg_fixup_insn_size(struct ureg_program *ureg,
 
 
 #define OP00( op )                                              \
-static INLINE void ureg_##op( struct ureg_program *ureg )       \
+static inline void ureg_##op( struct ureg_program *ureg )       \
 {                                                               \
    unsigned opcode = TGSI_OPCODE_##op;                          \
    struct ureg_emit_insn_result insn;                           \
@@ -592,7 +592,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg )       \
 }
 
 #define OP01( op )                                              \
-static INLINE void ureg_##op( struct ureg_program *ureg,        \
+static inline void ureg_##op( struct ureg_program *ureg,        \
                               struct ureg_src src )             \
 {                                                               \
    unsigned opcode = TGSI_OPCODE_##op;                          \
@@ -613,7 +613,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,        \
 }
 
 #define OP00_LBL( op )                                          \
-static INLINE void ureg_##op( struct ureg_program *ureg,        \
+static inline void ureg_##op( struct ureg_program *ureg,        \
                               unsigned *label_token )           \
 {                                                               \
    unsigned opcode = TGSI_OPCODE_##op;                          \
@@ -634,7 +634,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,        \
 }
 
 #define OP01_LBL( op )                                          \
-static INLINE void ureg_##op( struct ureg_program *ureg,        \
+static inline void ureg_##op( struct ureg_program *ureg,        \
                               struct ureg_src src,              \
                               unsigned *label_token )          \
 {                                                               \
@@ -657,7 +657,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,        \
 }
 
 #define OP10( op )                                                      \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst )                     \
 {                                                                       \
    unsigned opcode = TGSI_OPCODE_##op;                                  \
@@ -681,7 +681,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,                \
 
 
 #define OP11( op )                                                      \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src )                     \
 {                                                                       \
@@ -706,7 +706,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,                \
 }
 
 #define OP12( op )                                                      \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1 )                    \
@@ -733,7 +733,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,                \
 }
 
 #define OP12_TEX( op )                                                  \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               unsigned target,                          \
                               struct ureg_src src0,                     \
@@ -762,7 +762,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,                \
 }
 
 #define OP12_SAMPLE( op )                                               \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1 )                    \
@@ -791,7 +791,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,                \
 }
 
 #define OP13( op )                                                      \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1,                     \
@@ -820,7 +820,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,                \
 }
 
 #define OP13_SAMPLE( op )                                               \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1,                     \
@@ -851,7 +851,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,                \
 }
 
 #define OP14_TEX( op )                                                  \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               unsigned target,                          \
                               struct ureg_src src0,                     \
@@ -884,7 +884,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,                \
 }
 
 #define OP14_SAMPLE( op )                                               \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1,                     \
@@ -918,7 +918,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,                \
 
 
 #define OP14( op )                                                      \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1,                     \
@@ -950,7 +950,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,                \
 
 
 #define OP15( op )                                                      \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1,                     \
@@ -983,7 +983,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,                \
 }
 
 #define OP15_SAMPLE( op )                                               \
-static INLINE void ureg_##op( struct ureg_program *ureg,                \
+static inline void ureg_##op( struct ureg_program *ureg,                \
                               struct ureg_dst dst,                      \
                               struct ureg_src src0,                     \
                               struct ureg_src src1,                     \
@@ -1026,7 +1026,7 @@ static INLINE void ureg_##op( struct ureg_program *ureg,                \
 /***********************************************************************
  * Inline helpers for manipulating register structs:
  */
-static INLINE struct ureg_src 
+static inline struct ureg_src 
 ureg_negate( struct ureg_src reg )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1034,7 +1034,7 @@ ureg_negate( struct ureg_src reg )
    return reg;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_abs( struct ureg_src reg )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1043,7 +1043,7 @@ ureg_abs( struct ureg_src reg )
    return reg;
 }
 
-static INLINE struct ureg_src 
+static inline struct ureg_src 
 ureg_swizzle( struct ureg_src reg, 
               int x, int y, int z, int w )
 {
@@ -1065,13 +1065,13 @@ ureg_swizzle( struct ureg_src reg,
    return reg;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_scalar( struct ureg_src reg, int x )
 {
    return ureg_swizzle(reg, x, x, x, x);
 }
 
-static INLINE struct ureg_dst 
+static inline struct ureg_dst 
 ureg_writemask( struct ureg_dst reg,
                 unsigned writemask )
 {
@@ -1080,7 +1080,7 @@ ureg_writemask( struct ureg_dst reg,
    return reg;
 }
 
-static INLINE struct ureg_dst 
+static inline struct ureg_dst 
 ureg_saturate( struct ureg_dst reg )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1088,7 +1088,7 @@ ureg_saturate( struct ureg_dst reg )
    return reg;
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 ureg_predicate(struct ureg_dst reg,
                boolean negate,
                unsigned swizzle_x,
@@ -1106,7 +1106,7 @@ ureg_predicate(struct ureg_dst reg,
    return reg;
 }
 
-static INLINE struct ureg_dst 
+static inline struct ureg_dst 
 ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1118,7 +1118,7 @@ ureg_dst_indirect( struct ureg_dst reg, struct ureg_src addr )
    return reg;
 }
 
-static INLINE struct ureg_src 
+static inline struct ureg_src 
 ureg_src_indirect( struct ureg_src reg, struct ureg_src addr )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1130,7 +1130,7 @@ ureg_src_indirect( struct ureg_src reg, struct ureg_src addr )
    return reg;
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 ureg_dst_dimension( struct ureg_dst reg, int index )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1140,7 +1140,7 @@ ureg_dst_dimension( struct ureg_dst reg, int index )
    return reg;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_src_dimension( struct ureg_src reg, int index )
 {
    assert(reg.File != TGSI_FILE_NULL);
@@ -1150,7 +1150,7 @@ ureg_src_dimension( struct ureg_src reg, int index )
    return reg;
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 ureg_dst_dimension_indirect( struct ureg_dst reg, struct ureg_src addr,
                              int index )
 {
@@ -1164,7 +1164,7 @@ ureg_dst_dimension_indirect( struct ureg_dst reg, struct ureg_src addr,
    return reg;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_src_dimension_indirect( struct ureg_src reg, struct ureg_src addr,
                              int index )
 {
@@ -1178,21 +1178,21 @@ ureg_src_dimension_indirect( struct ureg_src reg, struct ureg_src addr,
    return reg;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_src_array_offset(struct ureg_src reg, int offset)
 {
    reg.Index += offset;
    return reg;
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 ureg_dst_array_offset( struct ureg_dst reg, int offset )
 {
    reg.Index += offset;
    return reg;
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 ureg_dst_array_register(unsigned file,
                         unsigned index,
                         unsigned array_id)
@@ -1224,14 +1224,14 @@ ureg_dst_array_register(unsigned file,
    return dst;
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 ureg_dst_register(unsigned file,
                   unsigned index)
 {
    return ureg_dst_array_register(file, index, 0);
 }
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 ureg_dst( struct ureg_src src )
 {
    struct ureg_dst dst;
@@ -1265,7 +1265,7 @@ ureg_dst( struct ureg_src src )
    return dst;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_src_array_register(unsigned file,
                         unsigned index,
                         unsigned array_id)
@@ -1295,14 +1295,14 @@ ureg_src_array_register(unsigned file,
    return src;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_src_register(unsigned file,
                   unsigned index)
 {
    return ureg_src_array_register(file, index, 0);
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_src( struct ureg_dst dst )
 {
    struct ureg_src src;
@@ -1332,7 +1332,7 @@ ureg_src( struct ureg_dst dst )
 
 
 
-static INLINE struct ureg_dst
+static inline struct ureg_dst
 ureg_dst_undef( void )
 {
    struct ureg_dst dst;
@@ -1362,7 +1362,7 @@ ureg_dst_undef( void )
    return dst;
 }
 
-static INLINE struct ureg_src
+static inline struct ureg_src
 ureg_src_undef( void )
 {
    struct ureg_src src;
@@ -1390,13 +1390,13 @@ ureg_src_undef( void )
    return src;
 }
 
-static INLINE boolean
+static inline boolean
 ureg_src_is_undef( struct ureg_src src )
 {
    return src.File == TGSI_FILE_NULL;
 }
 
-static INLINE boolean
+static inline boolean
 ureg_dst_is_undef( struct ureg_dst dst )
 {
    return dst.File == TGSI_FILE_NULL;
diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h
index 7fe8ff8145f..d77561aa7ce 100644
--- a/src/gallium/auxiliary/translate/translate.h
+++ b/src/gallium/auxiliary/translate/translate.h
@@ -130,12 +130,12 @@ struct translate *translate_create( const struct translate_key *key );
 
 boolean translate_is_output_format_supported(enum pipe_format format);
 
-static INLINE int translate_keysize( const struct translate_key *key )
+static inline int translate_keysize( const struct translate_key *key )
 {
    return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element);
 }
 
-static INLINE int translate_key_compare( const struct translate_key *a,
+static inline int translate_key_compare( const struct translate_key *a,
                                          const struct translate_key *b )
 {
    int keysize_a = translate_keysize(a);
@@ -148,7 +148,7 @@ static INLINE int translate_key_compare( const struct translate_key *a,
 }
 
 
-static INLINE void translate_key_sanitize( struct translate_key *a )
+static inline void translate_key_sanitize( struct translate_key *a )
 {
    int keysize = translate_keysize(a);
    char *ptr = (char *)a;
diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c
index bb8bdcb58c4..2bed02a454b 100644
--- a/src/gallium/auxiliary/translate/translate_cache.c
+++ b/src/gallium/auxiliary/translate/translate_cache.c
@@ -49,7 +49,7 @@ struct translate_cache * translate_cache_create( void )
 }
 
 
-static INLINE void delete_translates(struct translate_cache *cache)
+static inline void delete_translates(struct translate_cache *cache)
 {
    struct cso_hash *hash = cache->hash;
    struct cso_hash_iter iter = cso_hash_first_node(hash);
@@ -70,14 +70,14 @@ void translate_cache_destroy(struct translate_cache *cache)
 }
 
 
-static INLINE unsigned translate_hash_key_size(struct translate_key *key)
+static inline unsigned translate_hash_key_size(struct translate_key *key)
 {
    unsigned size = sizeof(struct translate_key) -
                    sizeof(struct translate_element) * (TRANSLATE_MAX_ATTRIBS - key->nr_elements);
    return size;
 }
 
-static INLINE unsigned create_key(struct translate_key *key)
+static inline unsigned create_key(struct translate_key *key)
 {
    unsigned hash_key;
    unsigned size = translate_hash_key_size(key);
diff --git a/src/gallium/auxiliary/util/u_bitmask.c b/src/gallium/auxiliary/util/u_bitmask.c
index 23c93a3ebcb..b19be29a5a4 100644
--- a/src/gallium/auxiliary/util/u_bitmask.c
+++ b/src/gallium/auxiliary/util/u_bitmask.c
@@ -85,7 +85,7 @@ util_bitmask_create(void)
 /**
  * Resize the bitmask if necessary 
  */
-static INLINE boolean
+static inline boolean
 util_bitmask_resize(struct util_bitmask *bm,
                     unsigned minimum_index)
 {
@@ -131,7 +131,7 @@ util_bitmask_resize(struct util_bitmask *bm,
 /**
  * Lazily update the filled.
  */
-static INLINE void
+static inline void
 util_bitmask_filled_set(struct util_bitmask *bm,
                         unsigned index)
 {
@@ -144,7 +144,7 @@ util_bitmask_filled_set(struct util_bitmask *bm,
    }
 }
 
-static INLINE void
+static inline void
 util_bitmask_filled_unset(struct util_bitmask *bm,
                           unsigned index)
 {
diff --git a/src/gallium/auxiliary/util/u_blend.h b/src/gallium/auxiliary/util/u_blend.h
index 2485c34d418..4f969778972 100644
--- a/src/gallium/auxiliary/util/u_blend.h
+++ b/src/gallium/auxiliary/util/u_blend.h
@@ -9,7 +9,7 @@
  * garbage that's there. Return a blend factor that will take that into
  * account.
  */
-static INLINE int
+static inline int
 util_blend_dst_alpha_to_one(int factor)
 {
    switch (factor) {
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index e3f30557a03..9737c940936 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -158,7 +158,7 @@ util_destroy_blit(struct blit_state *ctx)
 /**
  * Helper function to set the fragment shaders.
  */
-static INLINE void
+static inline void
 set_fragment_shader(struct blit_state *ctx, uint writemask,
                     enum pipe_format format,
                     enum pipe_texture_target pipe_tex)
@@ -194,7 +194,7 @@ set_fragment_shader(struct blit_state *ctx, uint writemask,
 /**
  * Helper function to set the vertex shader.
  */
-static INLINE void
+static inline void
 set_vertex_shader(struct blit_state *ctx)
 {
    /* vertex shader - still required to provide the linkage between
@@ -546,8 +546,8 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_save_rasterizer(ctx->cso);
    cso_save_sample_mask(ctx->cso);
    cso_save_min_samples(ctx->cso);
-   cso_save_samplers(ctx->cso, PIPE_SHADER_FRAGMENT);
-   cso_save_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT);
+   cso_save_fragment_samplers(ctx->cso);
+   cso_save_fragment_sampler_views(ctx->cso);
    cso_save_stream_outputs(ctx->cso);
    cso_save_viewport(ctx->cso);
    cso_save_framebuffer(ctx->cso);
@@ -572,8 +572,10 @@ util_blit_pixels_tex(struct blit_state *ctx,
    ctx->sampler.normalized_coords = normalized;
    ctx->sampler.min_img_filter = filter;
    ctx->sampler.mag_img_filter = filter;
-   cso_single_sampler(ctx->cso, PIPE_SHADER_FRAGMENT, 0, &ctx->sampler);
-   cso_single_sampler_done(ctx->cso, PIPE_SHADER_FRAGMENT);
+   {
+      const struct pipe_sampler_state *samplers[] = {&ctx->sampler};
+      cso_set_samplers(ctx->cso, PIPE_SHADER_FRAGMENT, 1, samplers);
+   }
 
    /* viewport */
    ctx->viewport.scale[0] = 0.5f * dst->width;
@@ -628,8 +630,8 @@ util_blit_pixels_tex(struct blit_state *ctx,
    cso_restore_rasterizer(ctx->cso);
    cso_restore_sample_mask(ctx->cso);
    cso_restore_min_samples(ctx->cso);
-   cso_restore_samplers(ctx->cso, PIPE_SHADER_FRAGMENT);
-   cso_restore_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT);
+   cso_restore_fragment_samplers(ctx->cso);
+   cso_restore_fragment_sampler_views(ctx->cso);
    cso_restore_viewport(ctx->cso);
    cso_restore_framebuffer(ctx->cso);
    cso_restore_fragment_shader(ctx->cso);
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index b5ef9a23966..85206eab1a7 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -938,7 +938,7 @@ static void *blitter_get_fs_texfetch_col(struct blitter_context_priv *ctx,
    }
 }
 
-static INLINE
+static inline
 void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
                                     enum pipe_texture_target target,
                                     unsigned nr_samples)
@@ -976,7 +976,7 @@ void *blitter_get_fs_texfetch_depth(struct blitter_context_priv *ctx,
    }
 }
 
-static INLINE
+static inline
 void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx,
                                            enum pipe_texture_target target,
                                            unsigned nr_samples)
@@ -1014,7 +1014,7 @@ void *blitter_get_fs_texfetch_depthstencil(struct blitter_context_priv *ctx,
    }
 }
 
-static INLINE
+static inline
 void *blitter_get_fs_texfetch_stencil(struct blitter_context_priv *ctx,
                                       enum pipe_texture_target target,
                                       unsigned nr_samples)
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index 93b0e513bd0..0cd173d6284 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -143,7 +143,7 @@ void util_blitter_cache_all_shaders(struct blitter_context *blitter);
 /**
  * Return the pipe context associated with a blitter context.
  */
-static INLINE
+static inline
 struct pipe_context *util_blitter_get_pipe(struct blitter_context *blitter)
 {
    return blitter->pipe;
@@ -371,77 +371,77 @@ void util_blitter_custom_resolve_color(struct blitter_context *blitter,
  *
  * States not listed here are not affected by util_blitter. */
 
-static INLINE
+static inline
 void util_blitter_save_blend(struct blitter_context *blitter,
                              void *state)
 {
    blitter->saved_blend_state = state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_depth_stencil_alpha(struct blitter_context *blitter,
                                            void *state)
 {
    blitter->saved_dsa_state = state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_vertex_elements(struct blitter_context *blitter,
                                        void *state)
 {
    blitter->saved_velem_state = state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_stencil_ref(struct blitter_context *blitter,
                                    const struct pipe_stencil_ref *state)
 {
    blitter->saved_stencil_ref = *state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_rasterizer(struct blitter_context *blitter,
                                   void *state)
 {
    blitter->saved_rs_state = state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_fragment_shader(struct blitter_context *blitter,
                                        void *fs)
 {
    blitter->saved_fs = fs;
 }
 
-static INLINE
+static inline
 void util_blitter_save_vertex_shader(struct blitter_context *blitter,
                                      void *vs)
 {
    blitter->saved_vs = vs;
 }
 
-static INLINE
+static inline
 void util_blitter_save_geometry_shader(struct blitter_context *blitter,
                                        void *gs)
 {
    blitter->saved_gs = gs;
 }
 
-static INLINE void
+static inline void
 util_blitter_save_tessctrl_shader(struct blitter_context *blitter,
                                   void *sh)
 {
    blitter->saved_tcs = sh;
 }
 
-static INLINE void
+static inline void
 util_blitter_save_tesseval_shader(struct blitter_context *blitter,
                                   void *sh)
 {
    blitter->saved_tes = sh;
 }
 
-static INLINE
+static inline
 void util_blitter_save_framebuffer(struct blitter_context *blitter,
                                    const struct pipe_framebuffer_state *state)
 {
@@ -449,21 +449,21 @@ void util_blitter_save_framebuffer(struct blitter_context *blitter,
    util_copy_framebuffer_state(&blitter->saved_fb_state, state);
 }
 
-static INLINE
+static inline
 void util_blitter_save_viewport(struct blitter_context *blitter,
                                 struct pipe_viewport_state *state)
 {
    blitter->saved_viewport = *state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_scissor(struct blitter_context *blitter,
                                struct pipe_scissor_state *state)
 {
    blitter->saved_scissor = *state;
 }
 
-static INLINE
+static inline
 void util_blitter_save_fragment_sampler_states(
                   struct blitter_context *blitter,
                   unsigned num_sampler_states,
@@ -476,7 +476,7 @@ void util_blitter_save_fragment_sampler_states(
           num_sampler_states * sizeof(void *));
 }
 
-static INLINE void
+static inline void
 util_blitter_save_fragment_sampler_views(struct blitter_context *blitter,
                                          unsigned num_views,
                                          struct pipe_sampler_view **views)
@@ -490,7 +490,7 @@ util_blitter_save_fragment_sampler_views(struct blitter_context *blitter,
                                   views[i]);
 }
 
-static INLINE void
+static inline void
 util_blitter_save_vertex_buffer_slot(struct blitter_context *blitter,
                                      struct pipe_vertex_buffer *vertex_buffers)
 {
@@ -500,7 +500,7 @@ util_blitter_save_vertex_buffer_slot(struct blitter_context *blitter,
           sizeof(struct pipe_vertex_buffer));
 }
 
-static INLINE void
+static inline void
 util_blitter_save_so_targets(struct blitter_context *blitter,
                              unsigned num_targets,
                              struct pipe_stream_output_target **targets)
@@ -514,7 +514,7 @@ util_blitter_save_so_targets(struct blitter_context *blitter,
                                targets[i]);
 }
 
-static INLINE void
+static inline void
 util_blitter_save_sample_mask(struct blitter_context *blitter,
                               unsigned sample_mask)
 {
@@ -522,7 +522,7 @@ util_blitter_save_sample_mask(struct blitter_context *blitter,
    blitter->saved_sample_mask = sample_mask;
 }
 
-static INLINE void
+static inline void
 util_blitter_save_render_condition(struct blitter_context *blitter,
                                    struct pipe_query *query,
                                    boolean condition,
diff --git a/src/gallium/auxiliary/util/u_box.h b/src/gallium/auxiliary/util/u_box.h
index 520a3d596cb..66cf989a830 100644
--- a/src/gallium/auxiliary/util/u_box.h
+++ b/src/gallium/auxiliary/util/u_box.h
@@ -4,7 +4,7 @@
 #include "pipe/p_state.h"
 #include "util/u_math.h"
 
-static INLINE
+static inline
 void u_box_1d( unsigned x,
 	       unsigned w,
 	       struct pipe_box *box )
@@ -17,7 +17,7 @@ void u_box_1d( unsigned x,
    box->depth = 1;
 }
 
-static INLINE
+static inline
 void u_box_2d( unsigned x,
 	       unsigned y,
 	       unsigned w,
@@ -32,7 +32,7 @@ void u_box_2d( unsigned x,
    box->depth = 1;
 }
 
-static INLINE
+static inline
 void u_box_origin_2d( unsigned w,
 		      unsigned h,
 		      struct pipe_box *box )
@@ -45,7 +45,7 @@ void u_box_origin_2d( unsigned w,
    box->depth = 1;
 }
 
-static INLINE
+static inline
 void u_box_2d_zslice( unsigned x,
 		      unsigned y,
 		      unsigned z,
@@ -61,7 +61,7 @@ void u_box_2d_zslice( unsigned x,
    box->depth = 1;
 }
 
-static INLINE
+static inline
 void u_box_3d( unsigned x,
 	       unsigned y,
 	       unsigned z,
@@ -86,7 +86,7 @@ void u_box_3d( unsigned x,
  *          3 if both width and height have been reduced.
  * Aliasing permitted.
  */
-static INLINE int
+static inline int
 u_box_clip_2d(struct pipe_box *dst,
               const struct pipe_box *box, int w, int h)
 {
@@ -129,14 +129,14 @@ u_box_clip_2d(struct pipe_box *dst,
    return res;
 }
 
-static INLINE int64_t
+static inline int64_t
 u_box_volume_3d(const struct pipe_box *box)
 {
    return (int64_t)box->width * box->height * box->depth;
 }
 
 /* Aliasing of @dst permitted. */
-static INLINE void
+static inline void
 u_box_union_2d(struct pipe_box *dst,
                const struct pipe_box *a, const struct pipe_box *b)
 {
@@ -148,7 +148,7 @@ u_box_union_2d(struct pipe_box *dst,
 }
 
 /* Aliasing of @dst permitted. */
-static INLINE void
+static inline void
 u_box_union_3d(struct pipe_box *dst,
                const struct pipe_box *a, const struct pipe_box *b)
 {
@@ -161,7 +161,7 @@ u_box_union_3d(struct pipe_box *dst,
    dst->depth = MAX2(a->z + a->depth, b->z + b->depth) - dst->z;
 }
 
-static INLINE boolean
+static inline boolean
 u_box_test_intersection_2d(const struct pipe_box *a,
                            const struct pipe_box *b)
 {
@@ -185,7 +185,7 @@ u_box_test_intersection_2d(const struct pipe_box *a,
    return TRUE;
 }
 
-static INLINE void
+static inline void
 u_box_minify_2d(struct pipe_box *dst,
                 const struct pipe_box *src, unsigned l)
 {
diff --git a/src/gallium/auxiliary/util/u_cache.c b/src/gallium/auxiliary/util/u_cache.c
index 9395c66f2f8..da0856981eb 100644
--- a/src/gallium/auxiliary/util/u_cache.c
+++ b/src/gallium/auxiliary/util/u_cache.c
@@ -155,7 +155,7 @@ util_cache_entry_get(struct util_cache *cache,
    return NULL;
 }
 
-static INLINE void
+static inline void
 util_cache_entry_destroy(struct util_cache *cache,
                          struct util_cache_entry *entry)
 {
diff --git a/src/gallium/auxiliary/util/u_clear.h b/src/gallium/auxiliary/util/u_clear.h
index af557be00bd..864d1302b4f 100644
--- a/src/gallium/auxiliary/util/u_clear.h
+++ b/src/gallium/auxiliary/util/u_clear.h
@@ -37,7 +37,7 @@
  * Clear the given buffers to the specified values.
  * No masking, no scissor (clear entire buffer).
  */
-static INLINE void
+static inline void
 util_clear(struct pipe_context *pipe,
            struct pipe_framebuffer_state *framebuffer, unsigned buffers,
            const union pipe_color_union *color, double depth, unsigned stencil)
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 23ab46c54bc..d1f9e978682 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -179,7 +179,7 @@ static int has_cpuid(void)
  * @sa cpuid.h included in gcc-4.3 onwards.
  * @sa http://msdn.microsoft.com/en-us/library/hskdteyh.aspx
  */
-static INLINE void
+static inline void
 cpuid(uint32_t ax, uint32_t *p)
 {
 #if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86)
@@ -216,7 +216,7 @@ cpuid(uint32_t ax, uint32_t *p)
  * @sa cpuid.h included in gcc-4.4 onwards.
  * @sa http://msdn.microsoft.com/en-us/library/hskdteyh%28v=vs.90%29.aspx
  */
-static INLINE void
+static inline void
 cpuid_count(uint32_t ax, uint32_t cx, uint32_t *p)
 {
 #if (defined(PIPE_CC_GCC) || defined(PIPE_CC_SUNPRO)) && defined(PIPE_ARCH_X86)
@@ -250,7 +250,7 @@ cpuid_count(uint32_t ax, uint32_t cx, uint32_t *p)
 }
 
 
-static INLINE uint64_t xgetbv(void)
+static inline uint64_t xgetbv(void)
 {
 #if defined(PIPE_CC_GCC)
    uint32_t eax, edx;
@@ -272,7 +272,7 @@ static INLINE uint64_t xgetbv(void)
 
 
 #if defined(PIPE_ARCH_X86)
-PIPE_ALIGN_STACK static INLINE boolean sse2_has_daz(void)
+PIPE_ALIGN_STACK static inline boolean sse2_has_daz(void)
 {
    struct {
       uint32_t pad1[7];
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 2d2d049b205..b4503deb8f6 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -256,12 +256,12 @@ static boolean str_has_option(const char *str, const char *name)
    return FALSE;
 }
 
-unsigned long
+uint64_t
 debug_get_flags_option(const char *name, 
                        const struct debug_named_value *flags,
-                       unsigned long dfault)
+                       uint64_t dfault)
 {
-   unsigned long result;
+   uint64_t result;
    const char *str;
    const struct debug_named_value *orig = flags;
    unsigned namealign = 0;
@@ -276,7 +276,7 @@ debug_get_flags_option(const char *name,
          namealign = MAX2(namealign, strlen(flags->name));
       for (flags = orig; flags->name; ++flags)
          _debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
-                      (int)sizeof(unsigned long)*CHAR_BIT/4, flags->value,
+                      (int)sizeof(uint64_t)*CHAR_BIT/4, flags->value,
                       flags->desc ? " " : "", flags->desc ? flags->desc : "");
    }
    else {
@@ -758,7 +758,8 @@ debug_print_bind_flags(const char *msg, unsigned usage)
       DEBUG_NAMED_VALUE(PIPE_BIND_CURSOR),
       DEBUG_NAMED_VALUE(PIPE_BIND_CUSTOM),
       DEBUG_NAMED_VALUE(PIPE_BIND_GLOBAL),
-      DEBUG_NAMED_VALUE(PIPE_BIND_SHADER_RESOURCE),
+      DEBUG_NAMED_VALUE(PIPE_BIND_SHADER_BUFFER),
+      DEBUG_NAMED_VALUE(PIPE_BIND_SHADER_IMAGE),
       DEBUG_NAMED_VALUE(PIPE_BIND_COMPUTE_RESOURCE),
       DEBUG_NAMED_VALUE(PIPE_BIND_COMMAND_ARGS_BUFFER),
       DEBUG_NAMED_VALUE(PIPE_BIND_SCANOUT),
diff --git a/src/gallium/auxiliary/util/u_debug.h b/src/gallium/auxiliary/util/u_debug.h
index 3b2255244a7..926063a1918 100644
--- a/src/gallium/auxiliary/util/u_debug.h
+++ b/src/gallium/auxiliary/util/u_debug.h
@@ -58,7 +58,7 @@ extern "C" {
 void _debug_vprintf(const char *format, va_list ap);
    
 
-static INLINE void
+static inline void
 _debug_printf(const char *format, ...)
 {
    va_list ap;
@@ -78,10 +78,10 @@ _debug_printf(const char *format, ...)
  * that is guaranteed to be printed in all platforms)
  */
 #if !defined(PIPE_OS_HAIKU)
-static INLINE void
+static inline void
 debug_printf(const char *format, ...) _util_printf_format(1,2);
 
-static INLINE void
+static inline void
 debug_printf(const char *format, ...)
 {
 #ifdef DEBUG
@@ -269,7 +269,7 @@ void _debug_assert_fail(const char *expr,
 struct debug_named_value
 {
    const char *name;
-   unsigned long value;
+   uint64_t value;
    const char *desc;
 };
 
@@ -377,10 +377,10 @@ debug_get_bool_option(const char *name, boolean dfault);
 long
 debug_get_num_option(const char *name, long dfault);
 
-unsigned long
+uint64_t
 debug_get_flags_option(const char *name, 
                        const struct debug_named_value *flags,
-                       unsigned long dfault);
+                       uint64_t dfault);
 
 #define DEBUG_GET_ONCE_BOOL_OPTION(sufix, name, dfault) \
 static boolean \
diff --git a/src/gallium/auxiliary/util/u_debug_describe.c b/src/gallium/auxiliary/util/u_debug_describe.c
index df73ed83ef6..f428d22d205 100644
--- a/src/gallium/auxiliary/util/u_debug_describe.c
+++ b/src/gallium/auxiliary/util/u_debug_describe.c
@@ -81,6 +81,15 @@ debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr)
 }
 
 void
+debug_describe_image_view(char* buf, const struct pipe_image_view *ptr)
+{
+   char res[128];
+   debug_describe_resource(res, ptr->resource);
+   util_sprintf(buf, "pipe_image_view<%s,%s>", res,
+                util_format_short_name(ptr->format));
+}
+
+void
 debug_describe_so_target(char* buf,
                          const struct pipe_stream_output_target *ptr)
 {
diff --git a/src/gallium/auxiliary/util/u_debug_describe.h b/src/gallium/auxiliary/util/u_debug_describe.h
index 4f7882b0b37..2172ecb4395 100644
--- a/src/gallium/auxiliary/util/u_debug_describe.h
+++ b/src/gallium/auxiliary/util/u_debug_describe.h
@@ -35,12 +35,14 @@ struct pipe_reference;
 struct pipe_resource;
 struct pipe_surface;
 struct pipe_sampler_view;
+struct pipe_image_view;
 
 /* a 256-byte buffer is necessary and sufficient */
 void debug_describe_reference(char* buf, const struct pipe_reference*ptr);
 void debug_describe_resource(char* buf, const struct pipe_resource *ptr);
 void debug_describe_surface(char* buf, const struct pipe_surface *ptr);
 void debug_describe_sampler_view(char* buf, const struct pipe_sampler_view *ptr);
+void debug_describe_image_view(char* buf, const struct pipe_image_view *ptr);
 void debug_describe_so_target(char* buf,
                               const struct pipe_stream_output_target *ptr);
 
diff --git a/src/gallium/auxiliary/util/u_debug_memory.c b/src/gallium/auxiliary/util/u_debug_memory.c
index 747837cd148..3e7ecfa79f3 100644
--- a/src/gallium/auxiliary/util/u_debug_memory.c
+++ b/src/gallium/auxiliary/util/u_debug_memory.c
@@ -92,7 +92,7 @@ pipe_static_mutex(list_mutex);
 static unsigned long last_no = 0;
 
 
-static INLINE struct debug_memory_header *
+static inline struct debug_memory_header *
 header_from_data(void *data)
 {
    if(data)
@@ -101,7 +101,7 @@ header_from_data(void *data)
       return NULL;
 }
 
-static INLINE void *
+static inline void *
 data_from_header(struct debug_memory_header *hdr)
 {
    if(hdr)
@@ -110,7 +110,7 @@ data_from_header(struct debug_memory_header *hdr)
       return NULL;
 }
 
-static INLINE struct debug_memory_footer *
+static inline struct debug_memory_footer *
 footer_from_header(struct debug_memory_header *hdr)
 {
    if(hdr)
diff --git a/src/gallium/auxiliary/util/u_debug_refcnt.h b/src/gallium/auxiliary/util/u_debug_refcnt.h
index c02fba27ddf..1f9218fec9a 100644
--- a/src/gallium/auxiliary/util/u_debug_refcnt.h
+++ b/src/gallium/auxiliary/util/u_debug_refcnt.h
@@ -42,7 +42,7 @@ extern int debug_refcnt_state;
 
 void debug_reference_slowpath(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change);
 
-static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+static inline void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
 {
    if (debug_refcnt_state >= 0)
       debug_reference_slowpath(p, get_desc, change);
@@ -50,7 +50,7 @@ static INLINE void debug_reference(const struct pipe_reference* p, debug_referen
 
 #else
 
-static INLINE void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
+static inline void debug_reference(const struct pipe_reference* p, debug_reference_descriptor get_desc, int change)
 {
 }
 
diff --git a/src/gallium/auxiliary/util/u_debug_symbol.c b/src/gallium/auxiliary/util/u_debug_symbol.c
index 542493252ce..10efdd593e5 100644
--- a/src/gallium/auxiliary/util/u_debug_symbol.c
+++ b/src/gallium/auxiliary/util/u_debug_symbol.c
@@ -146,7 +146,7 @@ DBGHELP_DISPATCH(SymGetLineFromAddr64,
 #undef DBGHELP_DISPATCH
 
 
-static INLINE boolean
+static inline boolean
 debug_symbol_name_dbghelp(const void *addr, char* buf, unsigned size)
 {
    DWORD64 dwAddr = (DWORD64)(uintptr_t)addr;
@@ -227,7 +227,7 @@ debug_symbol_name_dbghelp(const void *addr, char* buf, unsigned size)
  *
  * To fix this, post-process the output with tools/addr2line.sh
  */
-static INLINE boolean
+static inline boolean
 debug_symbol_name_glibc(const void *addr, char* buf, unsigned size)
 {
    char** syms = backtrace_symbols((void**)&addr, 1);
diff --git a/src/gallium/auxiliary/util/u_dirty_surfaces.h b/src/gallium/auxiliary/util/u_dirty_surfaces.h
index d31f8b9170a..ccde8a8c115 100644
--- a/src/gallium/auxiliary/util/u_dirty_surfaces.h
+++ b/src/gallium/auxiliary/util/u_dirty_surfaces.h
@@ -47,13 +47,13 @@ struct util_dirty_surface
    struct list_head dirty_list;
 };
 
-static INLINE void
+static inline void
 util_dirty_surfaces_init(struct util_dirty_surfaces *ds)
 {
    LIST_INITHEAD(&ds->dirty_list);
 }
 
-static INLINE void
+static inline void
 util_dirty_surfaces_use_for_sampling(struct pipe_context *pipe, struct util_dirty_surfaces *dss, util_dirty_surface_flush_t flush)
 {
    struct list_head *p, *next;
@@ -66,7 +66,7 @@ util_dirty_surfaces_use_for_sampling(struct pipe_context *pipe, struct util_dirt
    }
 }
 
-static INLINE void
+static inline void
 util_dirty_surfaces_use_levels_for_sampling(struct pipe_context *pipe, struct util_dirty_surfaces *dss, unsigned first, unsigned last, util_dirty_surface_flush_t flush)
 {
    struct list_head *p, *next;
@@ -82,7 +82,7 @@ util_dirty_surfaces_use_levels_for_sampling(struct pipe_context *pipe, struct ut
    }
 }
 
-static INLINE void
+static inline void
 util_dirty_surfaces_use_for_sampling_with(struct pipe_context *pipe, struct util_dirty_surfaces *dss, struct pipe_sampler_view *psv, struct pipe_sampler_state *pss, util_dirty_surface_flush_t flush)
 {
    if(!LIST_IS_EMPTY(&dss->dirty_list))
@@ -90,26 +90,26 @@ util_dirty_surfaces_use_for_sampling_with(struct pipe_context *pipe, struct util
 						  MIN2((unsigned)ceilf(pss->max_lod) + psv->u.tex.first_level, psv->u.tex.last_level), flush);
 }
 
-static INLINE void
+static inline void
 util_dirty_surface_init(struct util_dirty_surface *ds)
 {
    LIST_INITHEAD(&ds->dirty_list);
 }
 
-static INLINE boolean
+static inline boolean
 util_dirty_surface_is_dirty(struct util_dirty_surface *ds)
 {
    return !LIST_IS_EMPTY(&ds->dirty_list);
 }
 
-static INLINE void
+static inline void
 util_dirty_surface_set_dirty(struct util_dirty_surfaces *dss, struct util_dirty_surface *ds)
 {
    if(LIST_IS_EMPTY(&ds->dirty_list))
       LIST_ADDTAIL(&ds->dirty_list, &dss->dirty_list);
 }
 
-static INLINE void
+static inline void
 util_dirty_surface_set_clean(struct util_dirty_surfaces *dss, struct util_dirty_surface *ds)
 {
    if(!LIST_IS_EMPTY(&ds->dirty_list))
diff --git a/src/gallium/auxiliary/util/u_draw.h b/src/gallium/auxiliary/util/u_draw.h
index 9fc3e9924e1..5c0880f6ce4 100644
--- a/src/gallium/auxiliary/util/u_draw.h
+++ b/src/gallium/auxiliary/util/u_draw.h
@@ -39,7 +39,7 @@ extern "C" {
 #endif
 
 
-static INLINE void
+static inline void
 util_draw_init_info(struct pipe_draw_info *info)
 {
    memset(info, 0, sizeof(*info));
@@ -48,7 +48,7 @@ util_draw_init_info(struct pipe_draw_info *info)
 }
 
 
-static INLINE void
+static inline void
 util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count)
 {
    struct pipe_draw_info info;
@@ -63,7 +63,7 @@ util_draw_arrays(struct pipe_context *pipe, uint mode, uint start, uint count)
    pipe->draw_vbo(pipe, &info);
 }
 
-static INLINE void
+static inline void
 util_draw_elements(struct pipe_context *pipe, int index_bias,
                    uint mode, uint start, uint count)
 {
@@ -79,7 +79,7 @@ util_draw_elements(struct pipe_context *pipe, int index_bias,
    pipe->draw_vbo(pipe, &info);
 }
 
-static INLINE void
+static inline void
 util_draw_arrays_instanced(struct pipe_context *pipe,
                            uint mode, uint start, uint count,
                            uint start_instance,
@@ -99,7 +99,7 @@ util_draw_arrays_instanced(struct pipe_context *pipe,
    pipe->draw_vbo(pipe, &info);
 }
 
-static INLINE void
+static inline void
 util_draw_elements_instanced(struct pipe_context *pipe,
                              int index_bias,
                              uint mode, uint start, uint count,
@@ -120,7 +120,7 @@ util_draw_elements_instanced(struct pipe_context *pipe,
    pipe->draw_vbo(pipe, &info);
 }
 
-static INLINE void
+static inline void
 util_draw_range_elements(struct pipe_context *pipe,
                          int index_bias,
                          uint min_index,
diff --git a/src/gallium/auxiliary/util/u_dual_blend.h b/src/gallium/auxiliary/util/u_dual_blend.h
index e31d43c18bd..9450800f715 100644
--- a/src/gallium/auxiliary/util/u_dual_blend.h
+++ b/src/gallium/auxiliary/util/u_dual_blend.h
@@ -3,7 +3,7 @@
 
 #include "pipe/p_state.h"
 
-static INLINE boolean util_blend_factor_is_dual_src(int factor)
+static inline boolean util_blend_factor_is_dual_src(int factor)
 {
    return (factor == PIPE_BLENDFACTOR_SRC1_COLOR) ||
           (factor == PIPE_BLENDFACTOR_SRC1_ALPHA) ||
@@ -11,7 +11,7 @@ static INLINE boolean util_blend_factor_is_dual_src(int factor)
           (factor == PIPE_BLENDFACTOR_INV_SRC1_ALPHA);
 }
 
-static INLINE boolean util_blend_state_is_dual(const struct pipe_blend_state *blend, 
+static inline boolean util_blend_state_is_dual(const struct pipe_blend_state *blend, 
 				  int index)
 {
    if (util_blend_factor_is_dual_src(blend->rt[index].rgb_src_factor) ||
diff --git a/src/gallium/auxiliary/util/u_dump.h b/src/gallium/auxiliary/util/u_dump.h
index 58e7dfd8244..2598851152b 100644
--- a/src/gallium/auxiliary/util/u_dump.h
+++ b/src/gallium/auxiliary/util/u_dump.h
@@ -88,14 +88,16 @@ util_dump_tex_filter(unsigned value, boolean shortened);
 const char *
 util_dump_query_type(unsigned value, boolean shortened);
 
+const char *
+util_dump_prim_mode(unsigned value, boolean shortened);
+
 
 /*
  * p_state.h, through a FILE
  */
 
 void
-util_dump_template(FILE *stream,
-                   const struct pipe_resource *templat);
+util_dump_resource(FILE *stream, const struct pipe_resource *state);
 
 void
 util_dump_rasterizer_state(FILE *stream,
@@ -154,10 +156,23 @@ util_dump_surface(FILE *stream,
                   const struct pipe_surface *state);
 
 void
+util_dump_image_view(FILE *stream, const struct pipe_image_view *state);
+
+void
+util_dump_sampler_view(FILE *stream, const struct pipe_sampler_view *state);
+
+void
 util_dump_transfer(FILE *stream,
                    const struct pipe_transfer *state);
 
 void
+util_dump_constant_buffer(FILE *stream,
+                          const struct pipe_constant_buffer *state);
+
+void
+util_dump_index_buffer(FILE *stream, const struct pipe_index_buffer *state);
+
+void
 util_dump_vertex_buffer(FILE *stream,
                         const struct pipe_vertex_buffer *state);
 
@@ -166,6 +181,10 @@ util_dump_vertex_element(FILE *stream,
                          const struct pipe_vertex_element *state);
 
 void
+util_dump_stream_output_target(FILE *stream,
+                               const struct pipe_stream_output_target *state);
+
+void
 util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state);
 
 void
diff --git a/src/gallium/auxiliary/util/u_dump_defines.c b/src/gallium/auxiliary/util/u_dump_defines.c
index 03fd15d0c44..3ddc9554b50 100644
--- a/src/gallium/auxiliary/util/u_dump_defines.c
+++ b/src/gallium/auxiliary/util/u_dump_defines.c
@@ -392,3 +392,44 @@ util_dump_query_type_short_names[] = {
 };
 
 DEFINE_UTIL_DUMP_CONTINUOUS(query_type)
+
+
+static const char *
+util_dump_prim_mode_names[] = {
+   "PIPE_PRIM_POINTS",
+   "PIPE_PRIM_LINES",
+   "PIPE_PRIM_LINE_LOOP",
+   "PIPE_PRIM_LINE_STRIP",
+   "PIPE_PRIM_TRIANGLES",
+   "PIPE_PRIM_TRIANGLE_STRIP",
+   "PIPE_PRIM_TRIANGLE_FAN",
+   "PIPE_PRIM_QUADS",
+   "PIPE_PRIM_QUAD_STRIP",
+   "PIPE_PRIM_POLYGON",
+   "PIPE_PRIM_LINES_ADJACENCY",
+   "PIPE_PRIM_LINE_STRIP_ADJACENCY",
+   "PIPE_PRIM_TRIANGLES_ADJACENCY",
+   "PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY",
+   "PIPE_PRIM_PATCHES",
+};
+
+static const char *
+util_dump_prim_mode_short_names[] = {
+   "points",
+   "lines",
+   "line_loop",
+   "line_strip",
+   "triangles",
+   "triangle_strip",
+   "triangle_fan",
+   "quads",
+   "quad_strip",
+   "polygon",
+   "lines_adjacency",
+   "line_strip_adjacency",
+   "triangles_adjacency",
+   "triangle_strip_adjacency",
+   "patches",
+};
+
+DEFINE_UTIL_DUMP_CONTINUOUS(prim_mode)
diff --git a/src/gallium/auxiliary/util/u_dump_state.c b/src/gallium/auxiliary/util/u_dump_state.c
index 7f620b50cf0..441d16236b5 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -39,7 +39,7 @@
  * Dump primitives
  */
 
-static INLINE void
+static inline void
 util_stream_writef(FILE *stream, const char *format, ...)
 {
    static char buf[1024];
@@ -247,6 +247,42 @@ util_dump_enum_func(FILE *stream, unsigned value)
    util_dump_enum(stream, util_dump_func(value, TRUE));
 }
 
+static void
+util_dump_enum_prim_mode(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_prim_mode(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_target(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_tex_target(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_filter(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_tex_filter(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_mipfilter(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_tex_mipfilter(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_wrap(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_tex_wrap(value, TRUE));
+}
+
+static void
+util_dump_enum_stencil_op(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_stencil_op(value, TRUE));
+}
+
 
 /*
  * Public functions
@@ -254,38 +290,28 @@ util_dump_enum_func(FILE *stream, unsigned value)
 
 
 void
-util_dump_template(FILE *stream, const struct pipe_resource *templat)
+util_dump_resource(FILE *stream, const struct pipe_resource *state)
 {
-   if(!templat) {
+   if (!state) {
       util_dump_null(stream);
       return;
    }
 
    util_dump_struct_begin(stream, "pipe_resource");
 
-   util_dump_member(stream, int, templat, target);
-   util_dump_member(stream, format, templat, format);
-
-   util_dump_member_begin(stream, "width");
-   util_dump_uint(stream, templat->width0);
-   util_dump_member_end(stream);
-
-   util_dump_member_begin(stream, "height");
-   util_dump_uint(stream, templat->height0);
-   util_dump_member_end(stream);
-
-   util_dump_member_begin(stream, "depth");
-   util_dump_uint(stream, templat->depth0);
-   util_dump_member_end(stream);
+   util_dump_member(stream, enum_tex_target, state, target);
+   util_dump_member(stream, format, state, format);
 
-   util_dump_member_begin(stream, "array_size");
-   util_dump_uint(stream, templat->array_size);
-   util_dump_member_end(stream);
+   util_dump_member(stream, uint, state, width0);
+   util_dump_member(stream, uint, state, height0);
+   util_dump_member(stream, uint, state, depth0);
+   util_dump_member(stream, uint, state, array_size);
 
-   util_dump_member(stream, uint, templat, last_level);
-   util_dump_member(stream, uint, templat, usage);
-   util_dump_member(stream, uint, templat, bind);
-   util_dump_member(stream, uint, templat, flags);
+   util_dump_member(stream, uint, state, last_level);
+   util_dump_member(stream, uint, state, nr_samples);
+   util_dump_member(stream, uint, state, usage);
+   util_dump_member(stream, uint, state, bind);
+   util_dump_member(stream, uint, state, flags);
 
    util_dump_struct_end(stream);
 }
@@ -319,6 +345,7 @@ util_dump_rasterizer_state(FILE *stream, const struct pipe_rasterizer_state *sta
    util_dump_member(stream, uint, state, sprite_coord_enable);
    util_dump_member(stream, bool, state, sprite_coord_mode);
    util_dump_member(stream, bool, state, point_quad_rasterization);
+   util_dump_member(stream, bool, state, point_tri_clip);
    util_dump_member(stream, bool, state, point_size_per_vertex);
    util_dump_member(stream, bool, state, multisample);
    util_dump_member(stream, bool, state, line_smooth);
@@ -331,6 +358,7 @@ util_dump_rasterizer_state(FILE *stream, const struct pipe_rasterizer_state *sta
    util_dump_member(stream, bool, state, bottom_edge_rule);
    util_dump_member(stream, bool, state, rasterizer_discard);
    util_dump_member(stream, bool, state, depth_clip);
+   util_dump_member(stream, bool, state, clip_halfz);
    util_dump_member(stream, uint, state, clip_plane_enable);
 
    util_dump_member(stream, float, state, line_width);
@@ -426,7 +454,6 @@ util_dump_clip_state(FILE *stream, const struct pipe_clip_state *state)
 void
 util_dump_shader_state(FILE *stream, const struct pipe_shader_state *state)
 {
-   char str[8192];
    unsigned i;
 
    if(!state) {
@@ -434,33 +461,35 @@ util_dump_shader_state(FILE *stream, const struct pipe_shader_state *state)
       return;
    }
 
-   tgsi_dump_str(state->tokens, 0, str, sizeof(str));
-
    util_dump_struct_begin(stream, "pipe_shader_state");
 
    util_dump_member_begin(stream, "tokens");
-   util_dump_string(stream, str);
+   fprintf(stream, "\"\n");
+   tgsi_dump_to_file(state->tokens, 0, stream);
+   fprintf(stream, "\"");
    util_dump_member_end(stream);
 
-   util_dump_member_begin(stream, "stream_output");
-   util_dump_struct_begin(stream, "pipe_stream_output_info");
-   util_dump_member(stream, uint, &state->stream_output, num_outputs);
-   util_dump_array(stream, uint, state->stream_output.stride,
-                   Elements(state->stream_output.stride));
-   util_dump_array_begin(stream);
-   for(i = 0; i < state->stream_output.num_outputs; ++i) {
-      util_dump_elem_begin(stream);
-      util_dump_struct_begin(stream, ""); /* anonymous */
-      util_dump_member(stream, uint, &state->stream_output.output[i], register_index);
-      util_dump_member(stream, uint, &state->stream_output.output[i], start_component);
-      util_dump_member(stream, uint, &state->stream_output.output[i], num_components);
-      util_dump_member(stream, uint, &state->stream_output.output[i], output_buffer);
+   if (state->stream_output.num_outputs) {
+      util_dump_member_begin(stream, "stream_output");
+      util_dump_struct_begin(stream, "pipe_stream_output_info");
+      util_dump_member(stream, uint, &state->stream_output, num_outputs);
+      util_dump_array(stream, uint, state->stream_output.stride,
+                      Elements(state->stream_output.stride));
+      util_dump_array_begin(stream);
+      for(i = 0; i < state->stream_output.num_outputs; ++i) {
+         util_dump_elem_begin(stream);
+         util_dump_struct_begin(stream, ""); /* anonymous */
+         util_dump_member(stream, uint, &state->stream_output.output[i], register_index);
+         util_dump_member(stream, uint, &state->stream_output.output[i], start_component);
+         util_dump_member(stream, uint, &state->stream_output.output[i], num_components);
+         util_dump_member(stream, uint, &state->stream_output.output[i], output_buffer);
+         util_dump_struct_end(stream);
+         util_dump_elem_end(stream);
+      }
+      util_dump_array_end(stream);
       util_dump_struct_end(stream);
-      util_dump_elem_end(stream);
+      util_dump_member_end(stream);
    }
-   util_dump_array_end(stream);
-   util_dump_struct_end(stream);
-   util_dump_member_end(stream);
 
    util_dump_struct_end(stream);
 }
@@ -496,9 +525,12 @@ util_dump_depth_stencil_alpha_state(FILE *stream, const struct pipe_depth_stenci
       util_dump_member(stream, bool, &state->stencil[i], enabled);
       if (state->stencil[i].enabled) {
          util_dump_member(stream, enum_func, &state->stencil[i], func);
-         util_dump_member(stream, uint, &state->stencil[i], fail_op);
-         util_dump_member(stream, uint, &state->stencil[i], zpass_op);
-         util_dump_member(stream, uint, &state->stencil[i], zfail_op);
+         util_dump_member(stream, enum_stencil_op,
+                          &state->stencil[i], fail_op);
+         util_dump_member(stream, enum_stencil_op,
+                          &state->stencil[i], zpass_op);
+         util_dump_member(stream, enum_stencil_op,
+                          &state->stencil[i], zfail_op);
          util_dump_member(stream, uint, &state->stencil[i], valuemask);
          util_dump_member(stream, uint, &state->stencil[i], writemask);
       }
@@ -555,6 +587,8 @@ util_dump_blend_state(FILE *stream, const struct pipe_blend_state *state)
    util_dump_struct_begin(stream, "pipe_blend_state");
 
    util_dump_member(stream, bool, state, dither);
+   util_dump_member(stream, bool, state, alpha_to_coverage);
+   util_dump_member(stream, bool, state, alpha_to_one);
 
    util_dump_member(stream, bool, state, logicop_enable);
    if (state->logicop_enable) {
@@ -629,16 +663,17 @@ util_dump_sampler_state(FILE *stream, const struct pipe_sampler_state *state)
 
    util_dump_struct_begin(stream, "pipe_sampler_state");
 
-   util_dump_member(stream, uint, state, wrap_s);
-   util_dump_member(stream, uint, state, wrap_t);
-   util_dump_member(stream, uint, state, wrap_r);
-   util_dump_member(stream, uint, state, min_img_filter);
-   util_dump_member(stream, uint, state, min_mip_filter);
-   util_dump_member(stream, uint, state, mag_img_filter);
+   util_dump_member(stream, enum_tex_wrap, state, wrap_s);
+   util_dump_member(stream, enum_tex_wrap, state, wrap_t);
+   util_dump_member(stream, enum_tex_wrap, state, wrap_r);
+   util_dump_member(stream, enum_tex_filter, state, min_img_filter);
+   util_dump_member(stream, enum_tex_mipfilter, state, min_mip_filter);
+   util_dump_member(stream, enum_tex_filter, state, mag_img_filter);
    util_dump_member(stream, uint, state, compare_mode);
    util_dump_member(stream, enum_func, state, compare_func);
    util_dump_member(stream, bool, state, normalized_coords);
    util_dump_member(stream, uint, state, max_anisotropy);
+   util_dump_member(stream, bool, state, seamless_cube_map);
    util_dump_member(stream, float, state, lod_bias);
    util_dump_member(stream, float, state, min_lod);
    util_dump_member(stream, float, state, max_lod);
@@ -672,6 +707,67 @@ util_dump_surface(FILE *stream, const struct pipe_surface *state)
 
 
 void
+util_dump_image_view(FILE *stream, const struct pipe_image_view *state)
+{
+   if (!state) {
+      util_dump_null(stream);
+      return;
+   }
+
+   util_dump_struct_begin(stream, "pipe_image_view");
+
+   util_dump_member(stream, ptr, state, resource);
+   util_dump_member(stream, format, state, format);
+
+   if (state->resource->target == PIPE_BUFFER) {
+      util_dump_member(stream, uint, state, u.buf.first_element);
+      util_dump_member(stream, uint, state, u.buf.last_element);
+   }
+   else {
+      util_dump_member(stream, uint, state, u.tex.first_layer);
+      util_dump_member(stream, uint, state, u.tex.last_layer);
+      util_dump_member(stream, uint, state, u.tex.level);
+   }
+
+   util_dump_struct_end(stream);
+}
+
+
+void
+util_dump_sampler_view(FILE *stream, const struct pipe_sampler_view *state)
+{
+   if (!state) {
+      util_dump_null(stream);
+      return;
+   }
+
+   util_dump_struct_begin(stream, "pipe_sampler_view");
+
+   util_dump_member(stream, enum_tex_target, state, target);
+   util_dump_member(stream, format, state, format);
+   util_dump_member(stream, ptr, state, texture);
+
+   if (state->target == PIPE_BUFFER) {
+      util_dump_member(stream, uint, state, u.buf.first_element);
+      util_dump_member(stream, uint, state, u.buf.last_element);
+   }
+   else {
+      util_dump_member(stream, uint, state, u.tex.first_layer);
+      util_dump_member(stream, uint, state, u.tex.last_layer);
+      util_dump_member(stream, uint, state, u.tex.last_level);
+      util_dump_member(stream, uint, state, u.tex.last_level);
+   }
+
+   util_dump_member(stream, uint, state, swizzle_r);
+   util_dump_member(stream, uint, state, swizzle_g);
+   util_dump_member(stream, uint, state, swizzle_b);
+   util_dump_member(stream, uint, state, swizzle_a);
+
+   util_dump_struct_end(stream);
+}
+
+
+void
 util_dump_transfer(FILE *stream, const struct pipe_transfer *state)
 {
    if(!state) {
@@ -695,6 +791,45 @@ util_dump_transfer(FILE *stream, const struct pipe_transfer *state)
 
 
 void
+util_dump_constant_buffer(FILE *stream,
+                          const struct pipe_constant_buffer *state)
+{
+   if (!state) {
+      util_dump_null(stream);
+      return;
+   }
+
+   util_dump_struct_begin(stream, "pipe_constant_buffer");
+
+   util_dump_member(stream, ptr, state, buffer);
+   util_dump_member(stream, uint, state, buffer_offset);
+   util_dump_member(stream, uint, state, buffer_size);
+   util_dump_member(stream, ptr, state, user_buffer);
+
+   util_dump_struct_end(stream);
+}
+
+
+void
+util_dump_index_buffer(FILE *stream, const struct pipe_index_buffer *state)
+{
+   if (!state) {
+      util_dump_null(stream);
+      return;
+   }
+
+   util_dump_struct_begin(stream, "pipe_index_buffer");
+
+   util_dump_member(stream, uint, state, index_size);
+   util_dump_member(stream, uint, state, offset);
+   util_dump_member(stream, ptr, state, buffer);
+   util_dump_member(stream, ptr, state, user_buffer);
+
+   util_dump_struct_end(stream);
+}
+
+
+void
 util_dump_vertex_buffer(FILE *stream, const struct pipe_vertex_buffer *state)
 {
    if(!state) {
@@ -707,6 +842,7 @@ util_dump_vertex_buffer(FILE *stream, const struct pipe_vertex_buffer *state)
    util_dump_member(stream, uint, state, stride);
    util_dump_member(stream, uint, state, buffer_offset);
    util_dump_member(stream, ptr, state, buffer);
+   util_dump_member(stream, ptr, state, user_buffer);
 
    util_dump_struct_end(stream);
 }
@@ -732,6 +868,25 @@ util_dump_vertex_element(FILE *stream, const struct pipe_vertex_element *state)
 
 
 void
+util_dump_stream_output_target(FILE *stream,
+                               const struct pipe_stream_output_target *state)
+{
+   if (!state) {
+      util_dump_null(stream);
+      return;
+   }
+
+   util_dump_struct_begin(stream, "pipe_stream_output_target");
+
+   util_dump_member(stream, ptr, state, buffer);
+   util_dump_member(stream, uint, state, buffer_offset);
+   util_dump_member(stream, uint, state, buffer_size);
+
+   util_dump_struct_end(stream);
+}
+
+
+void
 util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state)
 {
    if(!state) {
@@ -743,7 +898,7 @@ util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state)
 
    util_dump_member(stream, bool, state, indexed);
 
-   util_dump_member(stream, uint, state, mode);
+   util_dump_member(stream, enum_prim_mode, state, mode);
    util_dump_member(stream, uint, state, start);
    util_dump_member(stream, uint, state, count);
 
@@ -830,12 +985,14 @@ void util_dump_blit_info(FILE *stream, const struct pipe_blit_info *info)
    util_dump_member_begin(stream, "mask");
    util_dump_string(stream, mask);
    util_dump_member_end(stream);
-   util_dump_member(stream, uint, info, filter);
+   util_dump_member(stream, enum_tex_filter, info, filter);
 
    util_dump_member(stream, bool, info, scissor_enable);
    util_dump_member_begin(stream, "scissor");
    util_dump_scissor_state(stream, &info->scissor);
    util_dump_member_end(stream);
 
+   util_dump_member(stream, bool, info, render_condition_enable);
+
    util_dump_struct_end(stream);
 }
diff --git a/src/gallium/auxiliary/util/u_dynarray.h b/src/gallium/auxiliary/util/u_dynarray.h
index 980cadf22d1..7b7a093d824 100644
--- a/src/gallium/auxiliary/util/u_dynarray.h
+++ b/src/gallium/auxiliary/util/u_dynarray.h
@@ -43,13 +43,13 @@ struct util_dynarray
    unsigned capacity;
 };
 
-static INLINE void
+static inline void
 util_dynarray_init(struct util_dynarray *buf)
 {
    memset(buf, 0, sizeof(*buf));
 }
 
-static INLINE void
+static inline void
 util_dynarray_fini(struct util_dynarray *buf)
 {
    if(buf->data)
@@ -60,7 +60,7 @@ util_dynarray_fini(struct util_dynarray *buf)
 }
 
 /* use util_dynarray_trim to reduce the allocated storage */
-static INLINE void *
+static inline void *
 util_dynarray_resize(struct util_dynarray *buf, unsigned newsize)
 {
    char *p;
@@ -78,13 +78,13 @@ util_dynarray_resize(struct util_dynarray *buf, unsigned newsize)
    return p;
 }
 
-static INLINE void *
+static inline void *
 util_dynarray_grow(struct util_dynarray *buf, int diff)
 {
    return util_dynarray_resize(buf, buf->size + diff);
 }
 
-static INLINE void
+static inline void
 util_dynarray_trim(struct util_dynarray *buf)
 {
    if (buf->size != buf->capacity) {
diff --git a/src/gallium/auxiliary/util/u_fifo.h b/src/gallium/auxiliary/util/u_fifo.h
index 9e007de1ada..a7aad6179d9 100644
--- a/src/gallium/auxiliary/util/u_fifo.h
+++ b/src/gallium/auxiliary/util/u_fifo.h
@@ -36,7 +36,7 @@ struct util_fifo
    size_t size;
 };
 
-static INLINE struct util_fifo *
+static inline struct util_fifo *
 u_fifo_create(size_t size)
 {
    struct util_fifo *fifo;
@@ -50,7 +50,7 @@ u_fifo_create(size_t size)
    return fifo;
 }
 
-static INLINE boolean
+static inline boolean
 u_fifo_add(struct util_fifo *fifo, void *ptr)
 {
    void **array = (void**)&fifo[1];
@@ -67,7 +67,7 @@ u_fifo_add(struct util_fifo *fifo, void *ptr)
    return TRUE;
 }
 
-static INLINE boolean
+static inline boolean
 u_fifo_pop(struct util_fifo *fifo, void **ptr)
 {
    void **array = (void**)&fifo[1];
@@ -85,7 +85,7 @@ u_fifo_pop(struct util_fifo *fifo, void **ptr)
    return TRUE;
 }
 
-static INLINE void
+static inline void
 u_fifo_destroy(struct util_fifo *fifo)
 {
    FREE(fifo);
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 621574c9673..42b39ff04fd 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -425,7 +425,7 @@ util_format_description(enum pipe_format format);
  * Format query functions.
  */
 
-static INLINE const char *
+static inline const char *
 util_format_name(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -438,7 +438,7 @@ util_format_name(enum pipe_format format)
    return desc->name;
 }
 
-static INLINE const char *
+static inline const char *
 util_format_short_name(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -454,7 +454,7 @@ util_format_short_name(enum pipe_format format)
 /**
  * Whether this format is plain, see UTIL_FORMAT_LAYOUT_PLAIN for more info.
  */
-static INLINE boolean
+static inline boolean
 util_format_is_plain(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -466,7 +466,7 @@ util_format_is_plain(enum pipe_format format)
    return desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ? TRUE : FALSE;
 }
 
-static INLINE boolean 
+static inline boolean 
 util_format_is_compressed(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -488,7 +488,7 @@ util_format_is_compressed(enum pipe_format format)
    }
 }
 
-static INLINE boolean 
+static inline boolean 
 util_format_is_s3tc(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -501,28 +501,28 @@ util_format_is_s3tc(enum pipe_format format)
    return desc->layout == UTIL_FORMAT_LAYOUT_S3TC ? TRUE : FALSE;
 }
 
-static INLINE boolean 
+static inline boolean 
 util_format_is_srgb(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
    return desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB;
 }
 
-static INLINE boolean
+static inline boolean
 util_format_has_depth(const struct util_format_description *desc)
 {
    return desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
           desc->swizzle[0] != UTIL_FORMAT_SWIZZLE_NONE;
 }
 
-static INLINE boolean
+static inline boolean
 util_format_has_stencil(const struct util_format_description *desc)
 {
    return desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
           desc->swizzle[1] != UTIL_FORMAT_SWIZZLE_NONE;
 }
 
-static INLINE boolean
+static inline boolean
 util_format_is_depth_or_stencil(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -536,7 +536,7 @@ util_format_is_depth_or_stencil(enum pipe_format format)
           util_format_has_stencil(desc);
 }
 
-static INLINE boolean
+static inline boolean
 util_format_is_depth_and_stencil(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -554,7 +554,7 @@ util_format_is_depth_and_stencil(enum pipe_format format)
 /**
  * Calculates the depth format type based upon the incoming format description.
  */
-static INLINE unsigned
+static inline unsigned
 util_get_depth_format_type(const struct util_format_description *desc)
 {
    unsigned depth_channel = desc->swizzle[0];
@@ -581,7 +581,7 @@ util_get_depth_format_mrd(const struct util_format_description *desc);
  * Return whether this is an RGBA, Z, S, or combined ZS format.
  * Useful for initializing pipe_blit_info::mask.
  */
-static INLINE unsigned
+static inline unsigned
 util_format_get_mask(enum pipe_format format)
 {
    const struct util_format_description *desc =
@@ -611,7 +611,7 @@ util_format_get_mask(enum pipe_format format)
  *
  * That is, the channels whose values are preserved.
  */
-static INLINE unsigned
+static inline unsigned
 util_format_colormask(const struct util_format_description *desc)
 {
    unsigned colormask;
@@ -643,7 +643,7 @@ util_format_colormask(const struct util_format_description *desc)
  * @param desc       a format description to check colormask with
  * @param colormask  a bit mask for channels, matches format of PIPE_MASK_RGBA
  */
-static INLINE boolean
+static inline boolean
 util_format_colormask_full(const struct util_format_description *desc, unsigned colormask)
 {
    return (~colormask & util_format_colormask(desc)) == 0;
@@ -709,7 +709,7 @@ util_format_is_supported(enum pipe_format format, unsigned bind);
  *
  *   PIPE_FORMAT_?8?8?8?8_UNORM
  */
-static INLINE boolean
+static inline boolean
 util_format_is_rgba8_variant(const struct util_format_description *desc)
 {
    unsigned chan;
@@ -737,7 +737,7 @@ util_format_is_rgba8_variant(const struct util_format_description *desc)
 /**
  * Return total bits needed for the pixel format per block.
  */
-static INLINE uint
+static inline uint
 util_format_get_blocksizebits(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -753,7 +753,7 @@ util_format_get_blocksizebits(enum pipe_format format)
 /**
  * Return bytes per block (not pixel) for the given format.
  */
-static INLINE uint
+static inline uint
 util_format_get_blocksize(enum pipe_format format)
 {
    uint bits = util_format_get_blocksizebits(format);
@@ -768,7 +768,7 @@ util_format_get_blocksize(enum pipe_format format)
    return bytes;
 }
 
-static INLINE uint
+static inline uint
 util_format_get_blockwidth(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -781,7 +781,7 @@ util_format_get_blockwidth(enum pipe_format format)
    return desc->block.width;
 }
 
-static INLINE uint
+static inline uint
 util_format_get_blockheight(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -794,7 +794,7 @@ util_format_get_blockheight(enum pipe_format format)
    return desc->block.height;
 }
 
-static INLINE unsigned
+static inline unsigned
 util_format_get_nblocksx(enum pipe_format format,
                          unsigned x)
 {
@@ -802,7 +802,7 @@ util_format_get_nblocksx(enum pipe_format format,
    return (x + blockwidth - 1) / blockwidth;
 }
 
-static INLINE unsigned
+static inline unsigned
 util_format_get_nblocksy(enum pipe_format format,
                          unsigned y)
 {
@@ -810,7 +810,7 @@ util_format_get_nblocksy(enum pipe_format format,
    return (y + blockheight - 1) / blockheight;
 }
 
-static INLINE unsigned
+static inline unsigned
 util_format_get_nblocks(enum pipe_format format,
                         unsigned width,
                         unsigned height)
@@ -818,14 +818,14 @@ util_format_get_nblocks(enum pipe_format format,
    return util_format_get_nblocksx(format, width) * util_format_get_nblocksy(format, height);
 }
 
-static INLINE size_t
+static inline size_t
 util_format_get_stride(enum pipe_format format,
                        unsigned width)
 {
    return util_format_get_nblocksx(format, width) * util_format_get_blocksize(format);
 }
 
-static INLINE size_t
+static inline size_t
 util_format_get_2d_size(enum pipe_format format,
                         size_t stride,
                         unsigned height)
@@ -833,7 +833,7 @@ util_format_get_2d_size(enum pipe_format format,
    return util_format_get_nblocksy(format, height) * stride;
 }
 
-static INLINE uint
+static inline uint
 util_format_get_component_bits(enum pipe_format format,
                                enum util_format_colorspace colorspace,
                                uint component)
@@ -880,7 +880,7 @@ util_format_get_component_bits(enum pipe_format format,
  * Given a linear RGB colorspace format, return the corresponding SRGB
  * format, or PIPE_FORMAT_NONE if none.
  */
-static INLINE enum pipe_format
+static inline enum pipe_format
 util_format_srgb(enum pipe_format format)
 {
    if (util_format_is_srgb(format))
@@ -930,7 +930,7 @@ util_format_srgb(enum pipe_format format)
  * Given an sRGB format, return the corresponding linear colorspace format.
  * For non sRGB formats, return the format unchanged.
  */
-static INLINE enum pipe_format
+static inline enum pipe_format
 util_format_linear(enum pipe_format format)
 {
    switch (format) {
@@ -977,7 +977,7 @@ util_format_linear(enum pipe_format format)
  * Given a depth-stencil format, return the corresponding stencil-only format.
  * For stencil-only formats, return the format unchanged.
  */
-static INLINE enum pipe_format
+static inline enum pipe_format
 util_format_stencil_only(enum pipe_format format)
 {
    switch (format) {
@@ -1006,7 +1006,7 @@ util_format_stencil_only(enum pipe_format format)
  * Converts PIPE_FORMAT_*I* to PIPE_FORMAT_*R*.
  * This is identity for non-intensity formats.
  */
-static INLINE enum pipe_format
+static inline enum pipe_format
 util_format_intensity_to_red(enum pipe_format format)
 {
    switch (format) {
@@ -1044,7 +1044,7 @@ util_format_intensity_to_red(enum pipe_format format)
  * Converts PIPE_FORMAT_*L* to PIPE_FORMAT_*R*.
  * This is identity for non-luminance formats.
  */
-static INLINE enum pipe_format
+static inline enum pipe_format
 util_format_luminance_to_red(enum pipe_format format)
 {
    switch (format) {
@@ -1122,7 +1122,7 @@ util_format_luminance_to_red(enum pipe_format format)
  * Return the number of components stored.
  * Formats with block size != 1x1 will always have 1 component (the block).
  */
-static INLINE unsigned
+static inline unsigned
 util_format_get_nr_components(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
@@ -1133,7 +1133,7 @@ util_format_get_nr_components(enum pipe_format format)
  * Return the index of the first non-void channel
  * -1 if no non-void channels
  */
-static INLINE int
+static inline int
 util_format_get_first_non_void_channel(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
diff --git a/src/gallium/auxiliary/util/u_format_pack.py b/src/gallium/auxiliary/util/u_format_pack.py
index d5138cc0577..fb42de723c4 100644
--- a/src/gallium/auxiliary/util/u_format_pack.py
+++ b/src/gallium/auxiliary/util/u_format_pack.py
@@ -616,7 +616,7 @@ def generate_format_unpack(format, dst_channel, dst_native_type, dst_suffix):
 
     name = format.short_name()
 
-    print 'static INLINE void'
+    print 'static inline void'
     print 'util_format_%s_unpack_%s(%s *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height)' % (name, dst_suffix, dst_native_type)
     print '{'
 
@@ -645,7 +645,7 @@ def generate_format_pack(format, src_channel, src_native_type, src_suffix):
 
     name = format.short_name()
 
-    print 'static INLINE void'
+    print 'static inline void'
     print 'util_format_%s_pack_%s(uint8_t *dst_row, unsigned dst_stride, const %s *src_row, unsigned src_stride, unsigned width, unsigned height)' % (name, src_suffix, src_native_type)
     print '{'
     
@@ -674,7 +674,7 @@ def generate_format_fetch(format, dst_channel, dst_native_type, dst_suffix):
 
     name = format.short_name()
 
-    print 'static INLINE void'
+    print 'static inline void'
     print 'util_format_%s_fetch_%s(%s *dst, const uint8_t *src, unsigned i, unsigned j)' % (name, dst_suffix, dst_native_type)
     print '{'
 
diff --git a/src/gallium/auxiliary/util/u_format_r11g11b10f.h b/src/gallium/auxiliary/util/u_format_r11g11b10f.h
index 57516c39c6e..218822b16e6 100644
--- a/src/gallium/auxiliary/util/u_format_r11g11b10f.h
+++ b/src/gallium/auxiliary/util/u_format_r11g11b10f.h
@@ -45,7 +45,7 @@
 
 #define F32_INFINITY         0x7f800000
 
-static INLINE unsigned f32_to_uf11(float val)
+static inline unsigned f32_to_uf11(float val)
 {
    union {
       float f;
@@ -94,7 +94,7 @@ static INLINE unsigned f32_to_uf11(float val)
    return uf11;
 }
 
-static INLINE float uf11_to_f32(uint16_t val)
+static inline float uf11_to_f32(uint16_t val)
 {
    union {
       float f;
@@ -131,7 +131,7 @@ static INLINE float uf11_to_f32(uint16_t val)
    return f32.f;
 }
 
-static INLINE unsigned f32_to_uf10(float val)
+static inline unsigned f32_to_uf10(float val)
 {
    union {
       float f;
@@ -180,7 +180,7 @@ static INLINE unsigned f32_to_uf10(float val)
    return uf10;
 }
 
-static INLINE float uf10_to_f32(uint16_t val)
+static inline float uf10_to_f32(uint16_t val)
 {
    union {
       float f;
@@ -217,14 +217,14 @@ static INLINE float uf10_to_f32(uint16_t val)
    return f32.f;
 }
 
-static INLINE unsigned float3_to_r11g11b10f(const float rgb[3])
+static inline unsigned float3_to_r11g11b10f(const float rgb[3])
 {
    return ( f32_to_uf11(rgb[0]) & 0x7ff) |
           ((f32_to_uf11(rgb[1]) & 0x7ff) << 11) |
           ((f32_to_uf10(rgb[2]) & 0x3ff) << 22);
 }
 
-static INLINE void r11g11b10f_to_float3(unsigned rgb, float retval[3])
+static inline void r11g11b10f_to_float3(unsigned rgb, float retval[3])
 {
    retval[0] = uf11_to_f32( rgb        & 0x7ff);
    retval[1] = uf11_to_f32((rgb >> 11) & 0x7ff);
diff --git a/src/gallium/auxiliary/util/u_format_rgb9e5.h b/src/gallium/auxiliary/util/u_format_rgb9e5.h
index c2a3f6f3e9d..59fc291e917 100644
--- a/src/gallium/auxiliary/util/u_format_rgb9e5.h
+++ b/src/gallium/auxiliary/util/u_format_rgb9e5.h
@@ -26,9 +26,10 @@
 #ifndef RGB9E5_H
 #define RGB9E5_H
 
-#include <math.h>
 #include <assert.h>
 
+#include "c99_math.h"
+
 #define RGB9E5_EXPONENT_BITS          5
 #define RGB9E5_MANTISSA_BITS          9
 #define RGB9E5_EXP_BIAS               15
@@ -73,9 +74,9 @@ typedef union {
    } field;
 } rgb9e5;
 
-static INLINE float rgb9e5_ClampRange(float x)
+static inline float rgb9e5_ClampRange(float x)
 {
-   if (x > 0.0) {
+   if (x > 0.0f) {
       if (x >= MAX_RGB9E5) {
          return MAX_RGB9E5;
       } else {
@@ -90,7 +91,7 @@ static INLINE float rgb9e5_ClampRange(float x)
 /* Ok, FloorLog2 is not correct for the denorm and zero values, but we
    are going to do a max of this value with the minimum rgb9e5 exponent
    that will hide these problem cases. */
-static INLINE int rgb9e5_FloorLog2(float x)
+static inline int rgb9e5_FloorLog2(float x)
 {
    float754 f;
 
@@ -98,7 +99,7 @@ static INLINE int rgb9e5_FloorLog2(float x)
    return (f.field.biasedexponent - 127);
 }
 
-static INLINE unsigned float3_to_rgb9e5(const float rgb[3])
+static inline unsigned float3_to_rgb9e5(const float rgb[3])
 {
    rgb9e5 retval;
    float maxrgb;
@@ -115,8 +116,8 @@ static INLINE unsigned float3_to_rgb9e5(const float rgb[3])
    exp_shared = MAX2(-RGB9E5_EXP_BIAS-1, rgb9e5_FloorLog2(maxrgb)) + 1 + RGB9E5_EXP_BIAS;
    assert(exp_shared <= RGB9E5_MAX_VALID_BIASED_EXP);
    assert(exp_shared >= 0);
-   /* This pow function could be replaced by a table. */
-   denom = pow(2, exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS);
+   /* This exp2 function could be replaced by a table. */
+   denom = exp2(exp_shared - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS);
 
    maxm = (int) floor(maxrgb / denom + 0.5);
    if (maxm == MAX_RGB9E5_MANTISSA+1) {
@@ -146,7 +147,7 @@ static INLINE unsigned float3_to_rgb9e5(const float rgb[3])
    return retval.raw;
 }
 
-static INLINE void rgb9e5_to_float3(unsigned rgb, float retval[3])
+static inline void rgb9e5_to_float3(unsigned rgb, float retval[3])
 {
    rgb9e5 v;
    int exponent;
@@ -154,7 +155,7 @@ static INLINE void rgb9e5_to_float3(unsigned rgb, float retval[3])
 
    v.raw = rgb;
    exponent = v.field.biasedexponent - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS;
-   scale = (float) pow(2, exponent);
+   scale = exp2f(exponent);
 
    retval[0] = v.field.r * scale;
    retval[1] = v.field.g * scale;
diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c
index 7e05989e6a1..cd3e165d3f0 100644
--- a/src/gallium/auxiliary/util/u_format_s3tc.c
+++ b/src/gallium/auxiliary/util/u_format_s3tc.c
@@ -235,7 +235,7 @@ util_format_dxt5_rgba_fetch_rgba_float(float *dst, const uint8_t *src, unsigned
  * Block decompression.
  */
 
-static INLINE void
+static inline void
 util_format_dxtn_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
                                         const uint8_t *src_row, unsigned src_stride,
                                         unsigned width, unsigned height,
@@ -312,7 +312,7 @@ util_format_dxt5_rgba_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
                                            16, FALSE);
 }
 
-static INLINE void
+static inline void
 util_format_dxtn_rgb_unpack_rgba_float(float *dst_row, unsigned dst_stride,
                                        const uint8_t *src_row, unsigned src_stride,
                                        unsigned width, unsigned height,
@@ -400,7 +400,7 @@ util_format_dxt5_rgba_unpack_rgba_float(float *dst_row, unsigned dst_stride,
  * Block compression.
  */
 
-static INLINE void
+static inline void
 util_format_dxtn_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
                                   const uint8_t *src, unsigned src_stride,
                                   unsigned width, unsigned height,
@@ -478,7 +478,7 @@ util_format_dxt5_rgba_pack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride,
                                      16, FALSE);
 }
 
-static INLINE void
+static inline void
 util_format_dxtn_pack_rgba_float(uint8_t *dst_row, unsigned dst_stride,
                                  const float *src, unsigned src_stride,
                                  unsigned width, unsigned height,
diff --git a/src/gallium/auxiliary/util/u_format_yuv.h b/src/gallium/auxiliary/util/u_format_yuv.h
index 4ec39812e47..41524d63f3a 100644
--- a/src/gallium/auxiliary/util/u_format_yuv.h
+++ b/src/gallium/auxiliary/util/u_format_yuv.h
@@ -54,7 +54,7 @@
  * precision in the coefficients.
  */
 
-static INLINE void
+static inline void
 util_format_rgb_float_to_yuv(float r, float g, float b,
                              uint8_t *y, uint8_t *u, uint8_t *v)
 {
@@ -74,7 +74,7 @@ util_format_rgb_float_to_yuv(float r, float g, float b,
 }
 
 
-static INLINE void
+static inline void
 util_format_yuv_to_rgb_float(uint8_t y, uint8_t u, uint8_t v,
                              float *r, float *g, float *b)
 {
@@ -92,7 +92,7 @@ util_format_yuv_to_rgb_float(uint8_t y, uint8_t u, uint8_t v,
 }
 
 
-static INLINE void
+static inline void
 util_format_rgb_8unorm_to_yuv(uint8_t r, uint8_t g, uint8_t b,
                 	      uint8_t *y, uint8_t *u, uint8_t *v)
 {
@@ -102,7 +102,7 @@ util_format_rgb_8unorm_to_yuv(uint8_t r, uint8_t g, uint8_t b,
 }
 
 
-static INLINE void
+static inline void
 util_format_yuv_to_rgb_8unorm(uint8_t y, uint8_t u, uint8_t v,
                               uint8_t *r, uint8_t *g, uint8_t *b)
 {
diff --git a/src/gallium/auxiliary/util/u_format_zs.c b/src/gallium/auxiliary/util/u_format_zs.c
index f1ed32f1d5c..69f2f2971f7 100644
--- a/src/gallium/auxiliary/util/u_format_zs.c
+++ b/src/gallium/auxiliary/util/u_format_zs.c
@@ -35,28 +35,28 @@
  * z32_unorm conversion functions
  */
 
-static INLINE uint16_t
+static inline uint16_t
 z32_unorm_to_z16_unorm(uint32_t z)
 {
    /* z * 0xffff / 0xffffffff */
    return z >> 16;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 z16_unorm_to_z32_unorm(uint16_t z)
 {
    /* z * 0xffffffff / 0xffff */
    return (z << 16) | z;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 z32_unorm_to_z24_unorm(uint32_t z)
 {
    /* z * 0xffffff / 0xffffffff */
    return z >> 8;
 }
 
-static INLINE uint32_t
+static inline uint32_t
 z24_unorm_to_z32_unorm(uint32_t z)
 {
    /* z * 0xffffffff / 0xffffff */
@@ -68,42 +68,42 @@ z24_unorm_to_z32_unorm(uint32_t z)
  * z32_float conversion functions
  */
 
-static INLINE uint16_t
+static inline uint16_t
 z32_float_to_z16_unorm(float z)
 {
    const float scale = 0xffff;
    return (uint16_t)(z * scale + 0.5f);
 }
 
-static INLINE float
+static inline float
 z16_unorm_to_z32_float(uint16_t z)
 {
    const float scale = 1.0 / 0xffff;
    return (float)(z * scale);
 }
 
-static INLINE uint32_t
+static inline uint32_t
 z32_float_to_z24_unorm(float z)
 {
    const double scale = 0xffffff;
    return (uint32_t)(z * scale) & 0xffffff;
 }
 
-static INLINE float
+static inline float
 z24_unorm_to_z32_float(uint32_t z)
 {
    const double scale = 1.0 / 0xffffff;
    return (float)(z * scale);
 }
 
-static INLINE uint32_t
+static inline uint32_t
 z32_float_to_z32_unorm(float z)
 {
    const double scale = 0xffffffff;
    return (uint32_t)(z * scale);
 }
 
-static INLINE float
+static inline float
 z32_unorm_to_z32_float(uint32_t z)
 {
    const double scale = 1.0 / 0xffffffff;
diff --git a/src/gallium/auxiliary/util/u_half.h b/src/gallium/auxiliary/util/u_half.h
index d340b9a7aef..d28fae3c77d 100644
--- a/src/gallium/auxiliary/util/u_half.h
+++ b/src/gallium/auxiliary/util/u_half.h
@@ -43,7 +43,7 @@ extern "C" {
  *  https://gist.github.com/2144712
  */
 
-static INLINE uint16_t
+static inline uint16_t
 util_float_to_half(float f)
 {
    uint32_t sign_mask  = 0x80000000;
@@ -96,7 +96,7 @@ util_float_to_half(float f)
    return f16;
 }
 
-static INLINE float
+static inline float
 util_half_to_float(uint16_t f16)
 {
    union fi infnan;
diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c
index 85302f1e194..42c4e44b644 100644
--- a/src/gallium/auxiliary/util/u_handle_table.c
+++ b/src/gallium/auxiliary/util/u_handle_table.c
@@ -96,7 +96,7 @@ handle_table_set_destroy(struct handle_table *ht,
 /**
  * Resize the table if necessary 
  */
-static INLINE int
+static inline int
 handle_table_resize(struct handle_table *ht,
                     unsigned minimum_size)
 {
@@ -126,7 +126,7 @@ handle_table_resize(struct handle_table *ht,
 }
 
 
-static INLINE void
+static inline void
 handle_table_clear(struct handle_table *ht, 
                    unsigned index)
 {
diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c
index 06c8b5c91a5..a505fbc4d83 100644
--- a/src/gallium/auxiliary/util/u_hash_table.c
+++ b/src/gallium/auxiliary/util/u_hash_table.c
@@ -68,7 +68,7 @@ struct util_hash_table_item
 };
 
 
-static INLINE struct util_hash_table_item *
+static inline struct util_hash_table_item *
 util_hash_table_item(struct cso_hash_iter iter)
 {
    return (struct util_hash_table_item *)cso_hash_iter_data(iter);
@@ -98,7 +98,7 @@ util_hash_table_create(unsigned (*hash)(void *key),
 }
 
 
-static INLINE struct cso_hash_iter
+static inline struct cso_hash_iter
 util_hash_table_find_iter(struct util_hash_table *ht,
                           void *key,
                           unsigned key_hash)
@@ -118,7 +118,7 @@ util_hash_table_find_iter(struct util_hash_table *ht,
 }
 
 
-static INLINE struct util_hash_table_item *
+static inline struct util_hash_table_item *
 util_hash_table_find_item(struct util_hash_table *ht,
                           void *key,
                           unsigned key_hash)
diff --git a/src/gallium/auxiliary/util/u_inlines.h b/src/gallium/auxiliary/util/u_inlines.h
index 95401621ec3..bb99a02ce49 100644
--- a/src/gallium/auxiliary/util/u_inlines.h
+++ b/src/gallium/auxiliary/util/u_inlines.h
@@ -51,13 +51,13 @@ extern "C" {
  */
 
 
-static INLINE void
+static inline void
 pipe_reference_init(struct pipe_reference *reference, unsigned count)
 {
    p_atomic_set(&reference->count, count);
 }
 
-static INLINE boolean
+static inline boolean
 pipe_is_referenced(struct pipe_reference *reference)
 {
    return p_atomic_read(&reference->count) != 0;
@@ -69,7 +69,7 @@ pipe_is_referenced(struct pipe_reference *reference)
  * Both 'ptr' and 'reference' may be NULL.
  * \return TRUE if the object's refcount hits zero and should be destroyed.
  */
-static INLINE boolean
+static inline boolean
 pipe_reference_described(struct pipe_reference *ptr, 
                          struct pipe_reference *reference, 
                          debug_reference_descriptor get_desc)
@@ -96,14 +96,14 @@ pipe_reference_described(struct pipe_reference *ptr,
    return destroy;
 }
 
-static INLINE boolean
+static inline boolean
 pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
 {
    return pipe_reference_described(ptr, reference, 
                                    (debug_reference_descriptor)debug_describe_reference);
 }
 
-static INLINE void
+static inline void
 pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
 {
    struct pipe_surface *old_surf = *ptr;
@@ -120,7 +120,7 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
  * of using a deleted context's surface_destroy() method when freeing a surface
  * that's shared by multiple contexts.
  */
-static INLINE void
+static inline void
 pipe_surface_release(struct pipe_context *pipe, struct pipe_surface **ptr)
 {
    if (pipe_reference_described(&(*ptr)->reference, NULL,
@@ -130,7 +130,7 @@ pipe_surface_release(struct pipe_context *pipe, struct pipe_surface **ptr)
 }
 
 
-static INLINE void
+static inline void
 pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
 {
    struct pipe_resource *old_tex = *ptr;
@@ -141,7 +141,7 @@ pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
    *ptr = tex;
 }
 
-static INLINE void
+static inline void
 pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_view *view)
 {
    struct pipe_sampler_view *old_view = *ptr;
@@ -158,7 +158,7 @@ pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_
  * work-around for fixing a dangling context pointer problem when textures
  * are shared by multiple contexts.  XXX fix this someday.
  */
-static INLINE void
+static inline void
 pipe_sampler_view_release(struct pipe_context *ctx,
                           struct pipe_sampler_view **ptr)
 {
@@ -173,8 +173,18 @@ pipe_sampler_view_release(struct pipe_context *ctx,
    *ptr = NULL;
 }
 
+static inline void
+pipe_image_view_reference(struct pipe_image_view **ptr, struct pipe_image_view *view)
+{
+   struct pipe_image_view *old_view = *ptr;
+
+   if (pipe_reference_described(&(*ptr)->reference, &view->reference,
+                                (debug_reference_descriptor)debug_describe_image_view))
+      old_view->context->image_view_destroy(old_view->context, old_view);
+   *ptr = view;
+}
 
-static INLINE void
+static inline void
 pipe_so_target_reference(struct pipe_stream_output_target **ptr,
                          struct pipe_stream_output_target *target)
 {
@@ -186,7 +196,7 @@ pipe_so_target_reference(struct pipe_stream_output_target **ptr,
    *ptr = target;
 }
 
-static INLINE void
+static inline void
 pipe_surface_reset(struct pipe_context *ctx, struct pipe_surface* ps,
                    struct pipe_resource *pt, unsigned level, unsigned layer)
 {
@@ -199,7 +209,7 @@ pipe_surface_reset(struct pipe_context *ctx, struct pipe_surface* ps,
    ps->context = ctx;
 }
 
-static INLINE void
+static inline void
 pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps,
                   struct pipe_resource *pt, unsigned level, unsigned layer)
 {
@@ -209,7 +219,7 @@ pipe_surface_init(struct pipe_context *ctx, struct pipe_surface* ps,
 }
 
 /* Return true if the surfaces are equal. */
-static INLINE boolean
+static inline boolean
 pipe_surface_equal(struct pipe_surface *s1, struct pipe_surface *s2)
 {
    return s1->texture == s2->texture &&
@@ -233,7 +243,7 @@ pipe_surface_equal(struct pipe_surface *s1, struct pipe_surface *s2)
  * \param bind  bitmask of PIPE_BIND_x flags
  * \param usage  bitmask of PIPE_USAGE_x flags
  */
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
 pipe_buffer_create( struct pipe_screen *screen,
 		    unsigned bind,
 		    unsigned usage,
@@ -261,7 +271,7 @@ pipe_buffer_create( struct pipe_screen *screen,
  * \param access  bitmask of PIPE_TRANSFER_x flags
  * \param transfer  returns a transfer object
  */
-static INLINE void *
+static inline void *
 pipe_buffer_map_range(struct pipe_context *pipe,
 		      struct pipe_resource *buffer,
 		      unsigned offset,
@@ -292,7 +302,7 @@ pipe_buffer_map_range(struct pipe_context *pipe,
  * \param access  bitmask of PIPE_TRANSFER_x flags
  * \param transfer  returns a transfer object
  */
-static INLINE void *
+static inline void *
 pipe_buffer_map(struct pipe_context *pipe,
                 struct pipe_resource *buffer,
                 unsigned access,
@@ -302,14 +312,14 @@ pipe_buffer_map(struct pipe_context *pipe,
 }
 
 
-static INLINE void
+static inline void
 pipe_buffer_unmap(struct pipe_context *pipe,
                   struct pipe_transfer *transfer)
 {
    pipe->transfer_unmap(pipe, transfer);
 }
 
-static INLINE void
+static inline void
 pipe_buffer_flush_mapped_range(struct pipe_context *pipe,
                                struct pipe_transfer *transfer,
                                unsigned offset,
@@ -333,7 +343,7 @@ pipe_buffer_flush_mapped_range(struct pipe_context *pipe,
    pipe->transfer_flush_region(pipe, transfer, &box);
 }
 
-static INLINE void
+static inline void
 pipe_buffer_write(struct pipe_context *pipe,
                   struct pipe_resource *buf,
                   unsigned offset,
@@ -367,7 +377,7 @@ pipe_buffer_write(struct pipe_context *pipe,
  * We can avoid GPU/CPU synchronization when writing range that has never
  * been written before.
  */
-static INLINE void
+static inline void
 pipe_buffer_write_nooverlap(struct pipe_context *pipe,
                             struct pipe_resource *buf,
                             unsigned offset, unsigned size,
@@ -393,7 +403,7 @@ pipe_buffer_write_nooverlap(struct pipe_context *pipe,
  * \param bind  bitmask of PIPE_BIND_x flags
  * \param usage  bitmask of PIPE_USAGE_x flags
  */
-static INLINE struct pipe_resource *
+static inline struct pipe_resource *
 pipe_buffer_create_with_data(struct pipe_context *pipe,
                              unsigned bind,
                              unsigned usage,
@@ -406,7 +416,7 @@ pipe_buffer_create_with_data(struct pipe_context *pipe,
    return res;
 }
 
-static INLINE void
+static inline void
 pipe_buffer_read(struct pipe_context *pipe,
                  struct pipe_resource *buf,
                  unsigned offset,
@@ -433,7 +443,7 @@ pipe_buffer_read(struct pipe_context *pipe,
  * Map a resource for reading/writing.
  * \param access  bitmask of PIPE_TRANSFER_x flags
  */
-static INLINE void *
+static inline void *
 pipe_transfer_map(struct pipe_context *context,
                   struct pipe_resource *resource,
                   unsigned level, unsigned layer,
@@ -456,7 +466,7 @@ pipe_transfer_map(struct pipe_context *context,
  * Map a 3D (texture) resource for reading/writing.
  * \param access  bitmask of PIPE_TRANSFER_x flags
  */
-static INLINE void *
+static inline void *
 pipe_transfer_map_3d(struct pipe_context *context,
                      struct pipe_resource *resource,
                      unsigned level,
@@ -474,14 +484,14 @@ pipe_transfer_map_3d(struct pipe_context *context,
                                 &box, transfer);
 }
 
-static INLINE void
+static inline void
 pipe_transfer_unmap( struct pipe_context *context,
                      struct pipe_transfer *transfer )
 {
    context->transfer_unmap( context, transfer );
 }
 
-static INLINE void
+static inline void
 pipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
                          struct pipe_resource *buf)
 {
@@ -502,7 +512,7 @@ pipe_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
  * Get the polygon offset enable/disable flag for the given polygon fill mode.
  * \param fill_mode  one of PIPE_POLYGON_MODE_POINT/LINE/FILL
  */
-static INLINE boolean
+static inline boolean
 util_get_offset(const struct pipe_rasterizer_state *templ,
                 unsigned fill_mode)
 {
@@ -519,7 +529,7 @@ util_get_offset(const struct pipe_rasterizer_state *templ,
    }
 }
 
-static INLINE float
+static inline float
 util_get_min_point_size(const struct pipe_rasterizer_state *state)
 {
    /* The point size should be clamped to this value at the rasterizer stage.
@@ -529,7 +539,7 @@ util_get_min_point_size(const struct pipe_rasterizer_state *state)
           !state->multisample ? 1.0f : 0.0f;
 }
 
-static INLINE void
+static inline void
 util_query_clear_result(union pipe_query_result *result, unsigned type)
 {
    switch (type) {
@@ -560,7 +570,7 @@ util_query_clear_result(union pipe_query_result *result, unsigned type)
 }
 
 /** Convert PIPE_TEXTURE_x to TGSI_TEXTURE_x */
-static INLINE unsigned
+static inline unsigned
 util_pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target,
                           unsigned nr_samples)
 {
@@ -605,7 +615,7 @@ util_pipe_tex_to_tgsi_tex(enum pipe_texture_target pipe_tex_target,
 }
 
 
-static INLINE void
+static inline void
 util_copy_constant_buffer(struct pipe_constant_buffer *dst,
                           const struct pipe_constant_buffer *src)
 {
@@ -623,7 +633,7 @@ util_copy_constant_buffer(struct pipe_constant_buffer *dst,
    }
 }
 
-static INLINE unsigned
+static inline unsigned
 util_max_layer(const struct pipe_resource *r, unsigned level)
 {
    switch (r->target) {
diff --git a/src/gallium/auxiliary/util/u_keymap.c b/src/gallium/auxiliary/util/u_keymap.c
index ae14eda3cec..daa2991ced6 100644
--- a/src/gallium/auxiliary/util/u_keymap.c
+++ b/src/gallium/auxiliary/util/u_keymap.c
@@ -71,7 +71,7 @@ default_delete_func(const struct keymap *map,
 }
 
 
-static INLINE struct keymap_item *
+static inline struct keymap_item *
 hash_table_item(struct cso_hash_iter iter)
 {
    return (struct keymap_item *) cso_hash_iter_data(iter);
@@ -143,7 +143,7 @@ util_delete_keymap(struct keymap *map, void *user)
 }
 
 
-static INLINE struct cso_hash_iter
+static inline struct cso_hash_iter
 hash_table_find_iter(const struct keymap *map, const void *key,
                      unsigned key_hash)
 {
@@ -162,7 +162,7 @@ hash_table_find_iter(const struct keymap *map, const void *key,
 }
 
 
-static INLINE struct keymap_item *
+static inline struct keymap_item *
 hash_table_find_item(const struct keymap *map, const void *key,
                      unsigned key_hash)
 {
diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h
index 81ffc9fb27d..87e52a344d4 100644
--- a/src/gallium/auxiliary/util/u_linear.h
+++ b/src/gallium/auxiliary/util/u_linear.h
@@ -89,7 +89,7 @@ void pipe_linear_fill_info(struct pipe_tile_info *t,
 			   unsigned tile_width, unsigned tile_height,
 			   unsigned tiles_x, unsigned tiles_y);
 
-static INLINE boolean pipe_linear_check_tile(const struct pipe_tile_info *t)
+static inline boolean pipe_linear_check_tile(const struct pipe_tile_info *t)
 {
    if (t->tile.size != t->block.size * t->cols * t->rows)
       return FALSE;
diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c
index ae9e9513b04..c58af911be7 100644
--- a/src/gallium/auxiliary/util/u_math.c
+++ b/src/gallium/auxiliary/util/u_math.c
@@ -48,7 +48,7 @@ init_pow2_table(void)
 {
    int i;
    for (i = 0; i < POW2_TABLE_SIZE; i++)
-      pow2_table[i] = (float) pow(2.0, (i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE);
+      pow2_table[i] = exp2f((i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE);
 }
 
 
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 3b4040f0ee2..56bd185f527 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -92,7 +92,7 @@ union di {
 /**
  * Extract the IEEE float32 exponent.
  */
-static INLINE signed
+static inline signed
 util_get_float32_exponent(float x)
 {
    union fi f;
@@ -112,7 +112,7 @@ util_get_float32_exponent(float x)
  * Compute exp2(ipart) with i << ipart
  * Compute exp2(fpart) with lookup table.
  */
-static INLINE float
+static inline float
 util_fast_exp2(float x)
 {
    int32_t ipart;
@@ -143,7 +143,7 @@ util_fast_exp2(float x)
 /**
  * Fast approximation to exp(x).
  */
-static INLINE float
+static inline float
 util_fast_exp(float x)
 {
    const float k = 1.44269f; /* = log2(e) */
@@ -160,7 +160,7 @@ extern float log2_table[LOG2_TABLE_SIZE];
 /**
  * Fast approximation to log2(x).
  */
-static INLINE float
+static inline float
 util_fast_log2(float x)
 {
    union fi num;
@@ -176,7 +176,7 @@ util_fast_log2(float x)
 /**
  * Fast approximation to x^y.
  */
-static INLINE float
+static inline float
 util_fast_pow(float x, float y)
 {
    return util_fast_exp2(util_fast_log2(x) * y);
@@ -184,7 +184,7 @@ util_fast_pow(float x, float y)
 
 /* Note that this counts zero as a power of two.
  */
-static INLINE boolean
+static inline boolean
 util_is_power_of_two( unsigned v )
 {
    return (v & (v-1)) == 0;
@@ -194,7 +194,7 @@ util_is_power_of_two( unsigned v )
 /**
  * Floor(x), returned as int.
  */
-static INLINE int
+static inline int
 util_ifloor(float f)
 {
    int ai, bi;
@@ -211,7 +211,7 @@ util_ifloor(float f)
 /**
  * Round float to nearest int.
  */
-static INLINE int
+static inline int
 util_iround(float f)
 {
 #if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) 
@@ -237,10 +237,10 @@ util_iround(float f)
 /**
  * Approximate floating point comparison
  */
-static INLINE boolean
+static inline boolean
 util_is_approx(float a, float b, float tol)
 {
-   return fabs(b - a) <= tol;
+   return fabsf(b - a) <= tol;
 }
 
 
@@ -256,7 +256,7 @@ util_is_approx(float a, float b, float tol)
 /**
  * Single-float
  */
-static INLINE boolean
+static inline boolean
 util_is_inf_or_nan(float x)
 {
    union fi tmp;
@@ -265,7 +265,7 @@ util_is_inf_or_nan(float x)
 }
 
 
-static INLINE boolean
+static inline boolean
 util_is_nan(float x)
 {
    union fi tmp;
@@ -274,7 +274,7 @@ util_is_nan(float x)
 }
 
 
-static INLINE int
+static inline int
 util_inf_sign(float x)
 {
    union fi tmp;
@@ -290,7 +290,7 @@ util_inf_sign(float x)
 /**
  * Double-float
  */
-static INLINE boolean
+static inline boolean
 util_is_double_inf_or_nan(double x)
 {
    union di tmp;
@@ -299,7 +299,7 @@ util_is_double_inf_or_nan(double x)
 }
 
 
-static INLINE boolean
+static inline boolean
 util_is_double_nan(double x)
 {
    union di tmp;
@@ -308,7 +308,7 @@ util_is_double_nan(double x)
 }
 
 
-static INLINE int
+static inline int
 util_double_inf_sign(double x)
 {
    union di tmp;
@@ -324,21 +324,21 @@ util_double_inf_sign(double x)
 /**
  * Half-float
  */
-static INLINE boolean
+static inline boolean
 util_is_half_inf_or_nan(int16_t x)
 {
    return (x & 0x7c00) == 0x7c00;
 }
 
 
-static INLINE boolean
+static inline boolean
 util_is_half_nan(int16_t x)
 {
    return (x & 0x7fff) > 0x7c00;
 }
 
 
-static INLINE int
+static inline int
 util_half_inf_sign(int16_t x)
 {
    if ((x & 0x7fff) != 0x7c00) {
@@ -359,7 +359,7 @@ util_half_inf_sign(int16_t x)
 #if defined(_MSC_VER) && (_M_IX86 || _M_AMD64 || _M_IA64)
 unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask);
 #pragma intrinsic(_BitScanForward)
-static INLINE
+static inline
 unsigned long ffs( unsigned long u )
 {
    unsigned long i;
@@ -369,7 +369,7 @@ unsigned long ffs( unsigned long u )
       return 0;
 }
 #elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
-static INLINE
+static inline
 unsigned ffs( unsigned u )
 {
    unsigned i;
@@ -409,7 +409,7 @@ unsigned ffs( unsigned u )
  * Find last bit set in a word.  The least significant bit is 1.
  * Return 0 if no bits are set.
  */
-static INLINE unsigned
+static inline unsigned
 util_last_bit(unsigned u)
 {
 #if defined(HAVE___BUILTIN_CLZ)
@@ -428,7 +428,7 @@ util_last_bit(unsigned u)
  * Find last bit set in a word.  The least significant bit is 1.
  * Return 0 if no bits are set.
  */
-static INLINE unsigned
+static inline unsigned
 util_last_bit64(uint64_t u)
 {
 #if defined(HAVE___BUILTIN_CLZLL)
@@ -448,7 +448,7 @@ util_last_bit64(uint64_t u)
  * significant bit is 1.
  * Return 0 if no bits are set.
  */
-static INLINE unsigned
+static inline unsigned
 util_last_bit_signed(int i)
 {
    if (i >= 0)
@@ -465,7 +465,7 @@ util_last_bit_signed(int i)
  * }
  *
  */
-static INLINE int
+static inline int
 u_bit_scan(unsigned *mask)
 {
    int i = ffs(*mask) - 1;
@@ -474,7 +474,7 @@ u_bit_scan(unsigned *mask)
 }
 
 #ifndef _MSC_VER
-static INLINE int
+static inline int
 u_bit_scan64(uint64_t *mask)
 {
    int i = ffsll(*mask) - 1;
@@ -486,7 +486,7 @@ u_bit_scan64(uint64_t *mask)
 /**
  * Return float bits.
  */
-static INLINE unsigned
+static inline unsigned
 fui( float f )
 {
    union fi fi;
@@ -494,7 +494,7 @@ fui( float f )
    return fi.ui;
 }
 
-static INLINE float
+static inline float
 uif(uint32_t ui)
 {
    union fi fi;
@@ -507,7 +507,7 @@ uif(uint32_t ui)
  * Convert ubyte to float in [0, 1].
  * XXX a 256-entry lookup table would be slightly faster.
  */
-static INLINE float
+static inline float
 ubyte_to_float(ubyte ub)
 {
    return (float) ub * (1.0f / 255.0f);
@@ -517,7 +517,7 @@ ubyte_to_float(ubyte ub)
 /**
  * Convert float in [0,1] to ubyte in [0,255] with clamping.
  */
-static INLINE ubyte
+static inline ubyte
 float_to_ubyte(float f)
 {
    union fi tmp;
@@ -535,13 +535,13 @@ float_to_ubyte(float f)
    }
 }
 
-static INLINE float
+static inline float
 byte_to_float_tex(int8_t b)
 {
    return (b == -128) ? -1.0F : b * 1.0F / 127.0F;
 }
 
-static INLINE int8_t
+static inline int8_t
 float_to_byte_tex(float f)
 {
    return (int8_t) (127.0F * f);
@@ -550,7 +550,7 @@ float_to_byte_tex(float f)
 /**
  * Calc log base 2
  */
-static INLINE unsigned
+static inline unsigned
 util_logbase2(unsigned n)
 {
 #if defined(HAVE___BUILTIN_CLZ)
@@ -570,7 +570,7 @@ util_logbase2(unsigned n)
 /**
  * Returns the smallest power of two >= x
  */
-static INLINE unsigned
+static inline unsigned
 util_next_power_of_two(unsigned x)
 {
 #if defined(HAVE___BUILTIN_CLZ)
@@ -602,7 +602,7 @@ util_next_power_of_two(unsigned x)
 /**
  * Return number of bits set in n.
  */
-static INLINE unsigned
+static inline unsigned
 util_bitcount(unsigned n)
 {
 #if defined(HAVE___BUILTIN_POPCOUNT)
@@ -623,7 +623,7 @@ util_bitcount(unsigned n)
 }
 
 
-static INLINE unsigned
+static inline unsigned
 util_bitcount64(uint64_t n)
 {
 #ifdef HAVE___BUILTIN_POPCOUNTLL
@@ -639,7 +639,7 @@ util_bitcount64(uint64_t n)
  * Algorithm taken from:
  * http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer
  */
-static INLINE unsigned
+static inline unsigned
 util_bitreverse(unsigned n)
 {
     n = ((n >> 1) & 0x55555555u) | ((n & 0x55555555u) << 1);
@@ -671,7 +671,7 @@ util_bitreverse(unsigned n)
 /**
  * Reverse byte order of a 32 bit word.
  */
-static INLINE uint32_t
+static inline uint32_t
 util_bswap32(uint32_t n)
 {
 #if defined(HAVE___BUILTIN_BSWAP32)
@@ -687,7 +687,7 @@ util_bswap32(uint32_t n)
 /**
  * Reverse byte order of a 64bit word.
  */
-static INLINE uint64_t
+static inline uint64_t
 util_bswap64(uint64_t n)
 {
 #if defined(HAVE___BUILTIN_BSWAP64)
@@ -702,14 +702,14 @@ util_bswap64(uint64_t n)
 /**
  * Reverse byte order of a 16 bit word.
  */
-static INLINE uint16_t
+static inline uint16_t
 util_bswap16(uint16_t n)
 {
    return (n >> 8) |
           (n << 8);
 }
 
-static INLINE void*
+static inline void*
 util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n)
 {
 #ifdef PIPE_ARCH_BIG_ENDIAN
@@ -746,7 +746,7 @@ util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t
 /**
  * Align a value, only works pot alignemnts.
  */
-static INLINE int
+static inline int
 align(int value, int alignment)
 {
    return (value + alignment - 1) & ~(alignment - 1);
@@ -755,7 +755,7 @@ align(int value, int alignment)
 /**
  * Works like align but on npot alignments.
  */
-static INLINE size_t
+static inline size_t
 util_align_npot(size_t value, size_t alignment)
 {
    if (value % alignment)
@@ -763,7 +763,7 @@ util_align_npot(size_t value, size_t alignment)
    return value;
 }
 
-static INLINE unsigned
+static inline unsigned
 u_minify(unsigned value, unsigned levels)
 {
     return MAX2(1, value >> levels);
@@ -796,13 +796,13 @@ do {                                     \
 #endif
 
 
-static INLINE uint32_t
+static inline uint32_t
 util_unsigned_fixed(float value, unsigned frac_bits)
 {
    return value < 0 ? 0 : (uint32_t)(value * (1<<frac_bits));
 }
 
-static INLINE int32_t
+static inline int32_t
 util_signed_fixed(float value, unsigned frac_bits)
 {
    return (int32_t)(value * (1<<frac_bits));
diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h
index 9ff6c7da919..7fe0fe6f053 100644
--- a/src/gallium/auxiliary/util/u_memory.h
+++ b/src/gallium/auxiliary/util/u_memory.h
@@ -67,7 +67,7 @@ extern "C" {
 /**
  * Duplicate a block of memory.
  */
-static INLINE void *
+static inline void *
 mem_dup(const void *src, uint size)
 {
    void *dup = MALLOC(size);
diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c
index 82f83702d1e..2069b56f464 100644
--- a/src/gallium/auxiliary/util/u_mm.c
+++ b/src/gallium/auxiliary/util/u_mm.c
@@ -224,7 +224,7 @@ u_mmFindBlock(struct mem_block *heap, int start)
 }
 
 
-static INLINE int
+static inline int
 Join2Blocks(struct mem_block *p)
 {
    /* XXX there should be some assertions here */
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index e0c9018f8ef..b882502b7ba 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -60,7 +60,7 @@ union util_color {
 /**
  * Pack ubyte R,G,B,A into dest pixel.
  */
-static INLINE void
+static inline void
 util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a,
                    enum pipe_format format, union util_color *uc)
 {
@@ -161,7 +161,7 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a,
 /**
  * Unpack RGBA from a packed pixel, returning values as ubytes in [0,255].
  */
-static INLINE void
+static inline void
 util_unpack_color_ub(enum pipe_format format, union util_color *uc,
                      ubyte *r, ubyte *g, ubyte *b, ubyte *a)
 {
@@ -333,7 +333,7 @@ util_unpack_color_ub(enum pipe_format format, union util_color *uc,
  * This will not work (and might not really be useful with float input)
  * for pure integer formats (which lack the pack_rgba_float function).
  */
-static INLINE void
+static inline void
 util_pack_color(const float rgba[4], enum pipe_format format, union util_color *uc)
 {
    ubyte r = 0;
@@ -437,7 +437,7 @@ util_pack_color(const float rgba[4], enum pipe_format format, union util_color *
 /* Integer versions of util_pack_z and util_pack_z_stencil - useful for
  * constructing clear masks.
  */
-static INLINE uint32_t
+static inline uint32_t
 util_pack_mask_z(enum pipe_format format, uint32_t z)
 {
    switch (format) {
@@ -462,7 +462,7 @@ util_pack_mask_z(enum pipe_format format, uint32_t z)
 }
 
 
-static INLINE uint64_t
+static inline uint64_t
 util_pack64_mask_z(enum pipe_format format, uint32_t z)
 {
    switch (format) {
@@ -474,7 +474,7 @@ util_pack64_mask_z(enum pipe_format format, uint32_t z)
 }
 
 
-static INLINE uint32_t
+static inline uint32_t
 util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s)
 {
    uint32_t packed = util_pack_mask_z(format, z);
@@ -497,7 +497,7 @@ util_pack_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s)
 }
 
 
-static INLINE uint64_t
+static inline uint64_t
 util_pack64_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s)
 {
    uint64_t packed;
@@ -516,7 +516,7 @@ util_pack64_mask_z_stencil(enum pipe_format format, uint32_t z, uint8_t s)
 /**
  * Note: it's assumed that z is in [0,1]
  */
-static INLINE uint32_t
+static inline uint32_t
 util_pack_z(enum pipe_format format, double z)
 {
    union fi fui;
@@ -558,7 +558,7 @@ util_pack_z(enum pipe_format format, double z)
 }
 
 
-static INLINE uint64_t
+static inline uint64_t
 util_pack64_z(enum pipe_format format, double z)
 {
    union fi fui;
@@ -580,7 +580,7 @@ util_pack64_z(enum pipe_format format, double z)
  * Pack Z and/or stencil values into a 32-bit value described by format.
  * Note: it's assumed that z is in [0,1] and s in [0,255]
  */
-static INLINE uint32_t
+static inline uint32_t
 util_pack_z_stencil(enum pipe_format format, double z, uint8_t s)
 {
    uint32_t packed = util_pack_z(format, z);
@@ -603,7 +603,7 @@ util_pack_z_stencil(enum pipe_format format, double z, uint8_t s)
 }
 
 
-static INLINE uint64_t
+static inline uint64_t
 util_pack64_z_stencil(enum pipe_format format, double z, uint8_t s)
 {
    uint64_t packed;
@@ -624,7 +624,7 @@ util_pack64_z_stencil(enum pipe_format format, double z, uint8_t s)
 /**
  * Pack 4 ubytes into a 4-byte word
  */
-static INLINE unsigned
+static inline unsigned
 pack_ub4(ubyte b0, ubyte b1, ubyte b2, ubyte b3)
 {
    return ((((unsigned int)b0) << 0) |
@@ -637,7 +637,7 @@ pack_ub4(ubyte b0, ubyte b1, ubyte b2, ubyte b3)
 /**
  * Pack/convert 4 floats into one 4-byte word.
  */
-static INLINE unsigned
+static inline unsigned
 pack_ui32_float4(float a, float b, float c, float d)
 {
    return pack_ub4( float_to_ubyte(a),
diff --git a/src/gallium/auxiliary/util/u_pointer.h b/src/gallium/auxiliary/util/u_pointer.h
index 30c23b79831..4f7a27ca61d 100644
--- a/src/gallium/auxiliary/util/u_pointer.h
+++ b/src/gallium/auxiliary/util/u_pointer.h
@@ -34,7 +34,7 @@
 extern "C" {
 #endif
 
-static INLINE intptr_t
+static inline intptr_t
 pointer_to_intptr( const void *p )
 {
    union {
@@ -45,7 +45,7 @@ pointer_to_intptr( const void *p )
    return pi.i;
 }
 
-static INLINE void *
+static inline void *
 intptr_to_pointer( intptr_t i )
 {
    union {
@@ -56,7 +56,7 @@ intptr_to_pointer( intptr_t i )
    return pi.p;
 }
 
-static INLINE uintptr_t
+static inline uintptr_t
 pointer_to_uintptr( const void *ptr )
 {
    union {
@@ -67,7 +67,7 @@ pointer_to_uintptr( const void *ptr )
    return pu.u;
 }
 
-static INLINE void *
+static inline void *
 uintptr_to_pointer( uintptr_t u )
 {
    union {
@@ -81,7 +81,7 @@ uintptr_to_pointer( uintptr_t u )
 /**
  * Return a pointer aligned to next multiple of N bytes.
  */
-static INLINE void *
+static inline void *
 align_pointer( const void *unaligned, uintptr_t alignment )
 {
    uintptr_t aligned = (pointer_to_uintptr( unaligned ) + alignment - 1) & ~(alignment - 1);
@@ -92,7 +92,7 @@ align_pointer( const void *unaligned, uintptr_t alignment )
 /**
  * Return a pointer aligned to next multiple of 16 bytes.
  */
-static INLINE void *
+static inline void *
 align16( void *unaligned )
 {
    return align_pointer( unaligned, 16 );
@@ -100,7 +100,7 @@ align16( void *unaligned )
 
 typedef void (*func_pointer)(void);
 
-static INLINE func_pointer
+static inline func_pointer
 pointer_to_func( void *p )
 {
    union {
@@ -111,7 +111,7 @@ pointer_to_func( void *p )
    return pf.f;
 }
 
-static INLINE void *
+static inline void *
 func_to_pointer( func_pointer f )
 {
    union {
diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h
index b2dd44df230..366801545ed 100644
--- a/src/gallium/auxiliary/util/u_prim.h
+++ b/src/gallium/auxiliary/util/u_prim.h
@@ -46,7 +46,7 @@ struct u_prim_vertex_count {
  * Decompose a primitive that is a loop, a strip, or a fan.  Return the
  * original primitive if it is already decomposed.
  */
-static INLINE unsigned
+static inline unsigned
 u_decomposed_prim(unsigned prim)
 {
    switch (prim) {
@@ -71,7 +71,7 @@ u_decomposed_prim(unsigned prim)
  * Reduce a primitive to one of PIPE_PRIM_POINTS, PIPE_PRIM_LINES, and
  * PIPE_PRIM_TRIANGLES.
  */
-static INLINE unsigned
+static inline unsigned
 u_reduced_prim(unsigned prim)
 {
    switch (prim) {
@@ -91,7 +91,7 @@ u_reduced_prim(unsigned prim)
 /**
  * Re-assemble a primitive to remove its adjacency.
  */
-static INLINE unsigned
+static inline unsigned
 u_assembled_prim(unsigned prim)
 {
    switch (prim) {
@@ -113,7 +113,7 @@ u_assembled_prim(unsigned prim)
  * source file, it will increase the size of the binary slightly more than
  * expected because of the use of a table.
  */
-static INLINE const struct u_prim_vertex_count *
+static inline const struct u_prim_vertex_count *
 u_prim_vertex_count(unsigned prim)
 {
    static const struct u_prim_vertex_count prim_table[PIPE_PRIM_MAX] = {
@@ -140,7 +140,7 @@ u_prim_vertex_count(unsigned prim)
  * Given a vertex count, return the number of primitives.
  * For polygons, return the number of triangles.
  */
-static INLINE unsigned
+static inline unsigned
 u_prims_for_vertices(unsigned prim, unsigned num)
 {
    const struct u_prim_vertex_count *info = u_prim_vertex_count(prim);
@@ -151,7 +151,7 @@ u_prims_for_vertices(unsigned prim, unsigned num)
    return 1 + ((num - info->min) / info->incr);
 }
 
-static INLINE boolean u_validate_pipe_prim( unsigned pipe_prim, unsigned nr )
+static inline boolean u_validate_pipe_prim( unsigned pipe_prim, unsigned nr )
 {
    const struct u_prim_vertex_count *count = u_prim_vertex_count(pipe_prim);
 
@@ -159,7 +159,7 @@ static INLINE boolean u_validate_pipe_prim( unsigned pipe_prim, unsigned nr )
 }
 
 
-static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
+static inline boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
 {
    const struct u_prim_vertex_count *count = u_prim_vertex_count(pipe_prim);
 
@@ -174,7 +174,7 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
    }
 }
 
-static INLINE unsigned
+static inline unsigned
 u_vertices_per_prim(int primitive)
 {
    switch(primitive) {
@@ -216,7 +216,7 @@ u_vertices_per_prim(int primitive)
  * statistics depend on knowing the exact number of decomposed
  * primitives for a set of vertices.
  */
-static INLINE unsigned
+static inline unsigned
 u_decomposed_prims_for_vertices(int primitive, int vertices)
 {
    switch (primitive) {
@@ -263,7 +263,7 @@ u_decomposed_prims_for_vertices(int primitive, int vertices)
  * count.  Each quad is treated as two triangles.  Polygons are treated as
  * triangle fans.
  */
-static INLINE unsigned
+static inline unsigned
 u_reduced_prims_for_vertices(int primitive, int vertices)
 {
    switch (primitive) {
diff --git a/src/gallium/auxiliary/util/u_range.h b/src/gallium/auxiliary/util/u_range.h
index efe25ef5e42..a1da5e5a6f0 100644
--- a/src/gallium/auxiliary/util/u_range.h
+++ b/src/gallium/auxiliary/util/u_range.h
@@ -47,7 +47,7 @@ struct util_range {
 };
 
 
-static INLINE void
+static inline void
 util_range_set_empty(struct util_range *range)
 {
    range->start = ~0;
@@ -55,7 +55,7 @@ util_range_set_empty(struct util_range *range)
 }
 
 /* This is like a union of two sets. */
-static INLINE void
+static inline void
 util_range_add(struct util_range *range, unsigned start, unsigned end)
 {
    if (start < range->start || end > range->end) {
@@ -66,7 +66,7 @@ util_range_add(struct util_range *range, unsigned start, unsigned end)
    }
 }
 
-static INLINE boolean
+static inline boolean
 util_ranges_intersect(struct util_range *range, unsigned start, unsigned end)
 {
    return MAX2(start, range->start) < MIN2(end, range->end);
@@ -75,14 +75,14 @@ util_ranges_intersect(struct util_range *range, unsigned start, unsigned end)
 
 /* Init/deinit */
 
-static INLINE void
+static inline void
 util_range_init(struct util_range *range)
 {
    pipe_mutex_init(range->write_mutex);
    util_range_set_empty(range);
 }
 
-static INLINE void
+static inline void
 util_range_destroy(struct util_range *range)
 {
    pipe_mutex_destroy(range->write_mutex);
diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h
index cf29dff0d02..b26f671f313 100644
--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -43,7 +43,7 @@ struct u_rect {
 
 /* Do two rectangles intersect?
  */
-static INLINE boolean
+static inline boolean
 u_rect_test_intersection(const struct u_rect *a,
                          const struct u_rect *b)
 {
@@ -55,7 +55,7 @@ u_rect_test_intersection(const struct u_rect *a,
 
 /* Find the intersection of two rectangles known to intersect.
  */
-static INLINE void
+static inline void
 u_rect_find_intersection(const struct u_rect *a,
                          struct u_rect *b)
 {
@@ -68,13 +68,13 @@ u_rect_find_intersection(const struct u_rect *a,
 }
 
 
-static INLINE int
+static inline int
 u_rect_area(const struct u_rect *r)
 {
    return (r->x1 - r->x0) * (r->y1 - r->y0);
 }
 
-static INLINE void
+static inline void
 u_rect_possible_intersection(const struct u_rect *a,
                              struct u_rect *b)
 {
@@ -88,7 +88,7 @@ u_rect_possible_intersection(const struct u_rect *a,
 
 /* Set @d to a rectangle that covers both @a and @b.
  */
-static INLINE void
+static inline void
 u_rect_union(struct u_rect *d, const struct u_rect *a, const struct u_rect *b)
 {
    d->x0 = MIN2(a->x0, b->x0);
diff --git a/src/gallium/auxiliary/util/u_resource.h b/src/gallium/auxiliary/util/u_resource.h
index a5e091fd66e..6736476f4da 100644
--- a/src/gallium/auxiliary/util/u_resource.h
+++ b/src/gallium/auxiliary/util/u_resource.h
@@ -36,7 +36,7 @@ util_resource_size(const struct pipe_resource *res);
  *
  * Note that this function returns true for single-layered array textures.
  */
-static INLINE boolean
+static inline boolean
 util_resource_is_array_texture(const struct pipe_resource *res)
 {
    switch (res->target) {
diff --git a/src/gallium/auxiliary/util/u_ringbuffer.c b/src/gallium/auxiliary/util/u_ringbuffer.c
index 648b105b137..5816b781660 100644
--- a/src/gallium/auxiliary/util/u_ringbuffer.c
+++ b/src/gallium/auxiliary/util/u_ringbuffer.c
@@ -56,7 +56,7 @@ void util_ringbuffer_destroy( struct util_ringbuffer *ring )
 /**
  * Return number of free entries in the ring
  */
-static INLINE unsigned util_ringbuffer_space( const struct util_ringbuffer *ring )
+static inline unsigned util_ringbuffer_space( const struct util_ringbuffer *ring )
 {
    return (ring->tail - (ring->head + 1)) & ring->mask;
 }
@@ -64,7 +64,7 @@ static INLINE unsigned util_ringbuffer_space( const struct util_ringbuffer *ring
 /**
  * Is the ring buffer empty?
  */
-static INLINE boolean util_ringbuffer_empty( const struct util_ringbuffer *ring )
+static inline boolean util_ringbuffer_empty( const struct util_ringbuffer *ring )
 {
    return util_ringbuffer_space(ring) == ring->mask;
 }
diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c
deleted file mode 100644
index 39e9b70d0f8..00000000000
--- a/src/gallium/auxiliary/util/u_snprintf.c
+++ /dev/null
@@ -1,1480 +0,0 @@
-/*
- * Copyright (c) 1995 Patrick Powell.
- *
- * This code is based on code written by Patrick Powell <[email protected]>.
- * It may be used for any purpose as long as this notice remains intact on all
- * source code distributions.
- */
-
-/*
- * Copyright (c) 2008 Holger Weiss.
- *
- * This version of the code is maintained by Holger Weiss <[email protected]>.
- * My changes to the code may freely be used, modified and/or redistributed for
- * any purpose.  It would be nice if additions and fixes to this file (including
- * trivial code cleanups) would be sent back in order to let me include them in
- * the version available at <http://www.jhweiss.de/software/snprintf.html>.
- * However, this is not a requirement for using or redistributing (possibly
- * modified) versions of this file, nor is leaving this notice intact mandatory.
- */
-
-/*
- * History
- *
- * 2008-01-20 Holger Weiss <[email protected]> for C99-snprintf 1.1:
- *
- * 	Fixed the detection of infinite floating point values on IRIX (and
- * 	possibly other systems) and applied another few minor cleanups.
- *
- * 2008-01-06 Holger Weiss <[email protected]> for C99-snprintf 1.0:
- *
- * 	Added a lot of new features, fixed many bugs, and incorporated various
- * 	improvements done by Andrew Tridgell <[email protected]>, Russ Allbery
- * 	<[email protected]>, Hrvoje Niksic <[email protected]>, Damien Miller
- * 	<[email protected]>, and others for the Samba, INN, Wget, and OpenSSH
- * 	projects.  The additions include: support the "e", "E", "g", "G", and
- * 	"F" conversion specifiers (and use conversion style "f" or "F" for the
- * 	still unsupported "a" and "A" specifiers); support the "hh", "ll", "j",
- * 	"t", and "z" length modifiers; support the "#" flag and the (non-C99)
- * 	"'" flag; use localeconv(3) (if available) to get both the current
- * 	locale's decimal point character and the separator between groups of
- * 	digits; fix the handling of various corner cases of field width and
- * 	precision specifications; fix various floating point conversion bugs;
- * 	handle infinite and NaN floating point values; don't attempt to write to
- * 	the output buffer (which may be NULL) if a size of zero was specified;
- * 	check for integer overflow of the field width, precision, and return
- * 	values and during the floating point conversion; use the OUTCHAR() macro
- * 	instead of a function for better performance; provide asprintf(3) and
- * 	vasprintf(3) functions; add new test cases.  The replacement functions
- * 	have been renamed to use an "rpl_" prefix, the function calls in the
- * 	main project (and in this file) must be redefined accordingly for each
- * 	replacement function which is needed (by using Autoconf or other means).
- * 	Various other minor improvements have been applied and the coding style
- * 	was cleaned up for consistency.
- *
- * 2007-07-23 Holger Weiss <[email protected]> for Mutt 1.5.13:
- *
- * 	C99 compliant snprintf(3) and vsnprintf(3) functions return the number
- * 	of characters that would have been written to a sufficiently sized
- * 	buffer (excluding the '\0').  The original code simply returned the
- * 	length of the resulting output string, so that's been fixed.
- *
- * 1998-03-05 Michael Elkins <[email protected]> for Mutt 0.90.8:
- *
- * 	The original code assumed that both snprintf(3) and vsnprintf(3) were
- * 	missing.  Some systems only have snprintf(3) but not vsnprintf(3), so
- * 	the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF.
- *
- * 1998-01-27 Thomas Roessler <[email protected]> for Mutt 0.89i:
- *
- * 	The PGP code was using unsigned hexadecimal formats.  Unfortunately,
- * 	unsigned formats simply didn't work.
- *
- * 1997-10-22 Brandon Long <[email protected]> for Mutt 0.87.1:
- *
- * 	Ok, added some minimal floating point support, which means this probably
- * 	requires libm on most operating systems.  Don't yet support the exponent
- * 	(e,E) and sigfig (g,G).  Also, fmtint() was pretty badly broken, it just
- * 	wasn't being exercised in ways which showed it, so that's been fixed.
- * 	Also, formatted the code to Mutt conventions, and removed dead code left
- * 	over from the original.  Also, there is now a builtin-test, run with:
- * 	gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm && ./snprintf
- *
- * 2996-09-15 Brandon Long <[email protected]> for Mutt 0.43:
- *
- * 	This was ugly.  It is still ugly.  I opted out of floating point
- * 	numbers, but the formatter understands just about everything from the
- * 	normal C string format, at least as far as I can tell from the Solaris
- * 	2.5 printf(3S) man page.
- */
-
-/*
- * ToDo
- *
- * - Add wide character support.
- * - Add support for "%a" and "%A" conversions.
- * - Create test routines which predefine the expected results.  Our test cases
- *   usually expose bugs in system implementations rather than in ours :-)
- */
-
-/*
- * Usage
- *
- * 1) The following preprocessor macros should be defined to 1 if the feature or
- *    file in question is available on the target system (by using Autoconf or
- *    other means), though basic functionality should be available as long as
- *    HAVE_STDARG_H and HAVE_STDLIB_H are defined correctly:
- *
- *    	HAVE_VSNPRINTF
- *    	HAVE_SNPRINTF
- *    	HAVE_VASPRINTF
- *    	HAVE_ASPRINTF
- *    	HAVE_STDARG_H
- *    	HAVE_STDDEF_H
- *    	HAVE_STDINT_H
- *    	HAVE_STDLIB_H
- *    	HAVE_INTTYPES_H
- *    	HAVE_LOCALE_H
- *    	HAVE_LOCALECONV
- *    	HAVE_LCONV_DECIMAL_POINT
- *    	HAVE_LCONV_THOUSANDS_SEP
- *    	HAVE_LONG_DOUBLE
- *    	HAVE_LONG_LONG_INT
- *    	HAVE_UNSIGNED_LONG_LONG_INT
- *    	HAVE_INTMAX_T
- *    	HAVE_UINTMAX_T
- *    	HAVE_UINTPTR_T
- *    	HAVE_PTRDIFF_T
- *    	HAVE_VA_COPY
- *    	HAVE___VA_COPY
- *
- * 2) The calls to the functions which should be replaced must be redefined
- *    throughout the project files (by using Autoconf or other means):
- *
- *    	#define vsnprintf rpl_vsnprintf
- *    	#define snprintf rpl_snprintf
- *    	#define vasprintf rpl_vasprintf
- *    	#define asprintf rpl_asprintf
- *
- * 3) The required replacement functions should be declared in some header file
- *    included throughout the project files:
- *
- *    	#if HAVE_CONFIG_H
- *    	#include <config.h>
- *    	#endif
- *    	#if HAVE_STDARG_H
- *    	#include <stdarg.h>
- *    	#if !HAVE_VSNPRINTF
- *    	int rpl_vsnprintf(char *, size_t, const char *, va_list);
- *    	#endif
- *    	#if !HAVE_SNPRINTF
- *    	int rpl_snprintf(char *, size_t, const char *, ...);
- *    	#endif
- *    	#if !HAVE_VASPRINTF
- *    	int rpl_vasprintf(char **, const char *, va_list);
- *    	#endif
- *    	#if !HAVE_ASPRINTF
- *    	int rpl_asprintf(char **, const char *, ...);
- *    	#endif
- *    	#endif
- *
- * Autoconf macros for handling step 1 and step 2 are available at
- * <http://www.jhweiss.de/software/snprintf.html>.
- */
-
-#include "pipe/p_config.h"
-
-#if HAVE_CONFIG_H
-#include <config.h>
-#else
-#ifdef _MSC_VER
-#define vsnprintf util_vsnprintf
-#define snprintf util_snprintf
-#define HAVE_VSNPRINTF 0
-#define HAVE_SNPRINTF 0
-#define HAVE_VASPRINTF 1 /* not needed */
-#define HAVE_ASPRINTF 1 /* not needed */
-#define HAVE_STDARG_H 1
-#define HAVE_STDDEF_H 1
-#define HAVE_STDINT_H 1
-#define HAVE_STDLIB_H 1
-#define HAVE_INTTYPES_H 0
-#define HAVE_LOCALE_H 0
-#define HAVE_LOCALECONV 0
-#define HAVE_LCONV_DECIMAL_POINT 0
-#define HAVE_LCONV_THOUSANDS_SEP 0
-#define HAVE_LONG_DOUBLE 0
-#define HAVE_LONG_LONG_INT 1
-#define HAVE_UNSIGNED_LONG_LONG_INT 1
-#define HAVE_INTMAX_T 0
-#define HAVE_UINTMAX_T 0
-#define HAVE_UINTPTR_T 1
-#define HAVE_PTRDIFF_T 1
-#define HAVE_VA_COPY 0
-#define HAVE___VA_COPY 0
-#else
-#define HAVE_VSNPRINTF 1
-#define HAVE_SNPRINTF 1
-#define HAVE_VASPRINTF 1
-#define HAVE_ASPRINTF 1
-#endif
-#endif	/* HAVE_CONFIG_H */
-
-#if !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || !HAVE_VASPRINTF
-#include <stdio.h>	/* For NULL, size_t, vsnprintf(3), and vasprintf(3). */
-#ifdef VA_START
-#undef VA_START
-#endif	/* defined(VA_START) */
-#ifdef VA_SHIFT
-#undef VA_SHIFT
-#endif	/* defined(VA_SHIFT) */
-#if HAVE_STDARG_H
-#include <stdarg.h>
-#define VA_START(ap, last) va_start(ap, last)
-#define VA_SHIFT(ap, value, type) /* No-op for ANSI C. */
-#else	/* Assume <varargs.h> is available. */
-#include <varargs.h>
-#define VA_START(ap, last) va_start(ap)	/* "last" is ignored. */
-#define VA_SHIFT(ap, value, type) value = va_arg(ap, type)
-#endif	/* HAVE_STDARG_H */
-
-#if !HAVE_VASPRINTF
-#if HAVE_STDLIB_H
-#include <stdlib.h>	/* For malloc(3). */
-#endif	/* HAVE_STDLIB_H */
-#ifdef VA_COPY
-#undef VA_COPY
-#endif	/* defined(VA_COPY) */
-#ifdef VA_END_COPY
-#undef VA_END_COPY
-#endif	/* defined(VA_END_COPY) */
-#if HAVE_VA_COPY
-#define VA_COPY(dest, src) va_copy(dest, src)
-#define VA_END_COPY(ap) va_end(ap)
-#elif HAVE___VA_COPY
-#define VA_COPY(dest, src) __va_copy(dest, src)
-#define VA_END_COPY(ap) va_end(ap)
-#else
-#define VA_COPY(dest, src) (void)mymemcpy(&dest, &src, sizeof(va_list))
-#define VA_END_COPY(ap) /* No-op. */
-#define NEED_MYMEMCPY 1
-static void *mymemcpy(void *, void *, size_t);
-#endif	/* HAVE_VA_COPY */
-#endif	/* !HAVE_VASPRINTF */
-
-#if !HAVE_VSNPRINTF
-#include <limits.h>	/* For *_MAX. */
-#if HAVE_INTTYPES_H
-#include <inttypes.h>	/* For intmax_t (if not defined in <stdint.h>). */
-#endif	/* HAVE_INTTYPES_H */
-#if HAVE_LOCALE_H
-#include <locale.h>	/* For localeconv(3). */
-#endif	/* HAVE_LOCALE_H */
-#if HAVE_STDDEF_H
-#include <stddef.h>	/* For ptrdiff_t. */
-#endif	/* HAVE_STDDEF_H */
-#if HAVE_STDINT_H
-#include <stdint.h>	/* For intmax_t. */
-#endif	/* HAVE_STDINT_H */
-
-/* Support for unsigned long long int.  We may also need ULLONG_MAX. */
-#ifndef ULONG_MAX	/* We may need ULONG_MAX as a fallback. */
-#ifdef UINT_MAX
-#define ULONG_MAX UINT_MAX
-#else
-#define ULONG_MAX INT_MAX
-#endif	/* defined(UINT_MAX) */
-#endif	/* !defined(ULONG_MAX) */
-#ifdef ULLONG
-#undef ULLONG
-#endif	/* defined(ULLONG) */
-#if HAVE_UNSIGNED_LONG_LONG_INT
-#define ULLONG unsigned long long int
-#ifndef ULLONG_MAX
-#define ULLONG_MAX ULONG_MAX
-#endif	/* !defined(ULLONG_MAX) */
-#else
-#define ULLONG unsigned long int
-#ifdef ULLONG_MAX
-#undef ULLONG_MAX
-#endif	/* defined(ULLONG_MAX) */
-#define ULLONG_MAX ULONG_MAX
-#endif	/* HAVE_LONG_LONG_INT */
-
-/* Support for uintmax_t.  We also need UINTMAX_MAX. */
-#ifdef UINTMAX_T
-#undef UINTMAX_T
-#endif	/* defined(UINTMAX_T) */
-#if HAVE_UINTMAX_T || defined(uintmax_t)
-#define UINTMAX_T uintmax_t
-#ifndef UINTMAX_MAX
-#define UINTMAX_MAX ULLONG_MAX
-#endif	/* !defined(UINTMAX_MAX) */
-#else
-#define UINTMAX_T ULLONG
-#ifdef UINTMAX_MAX
-#undef UINTMAX_MAX
-#endif	/* defined(UINTMAX_MAX) */
-#define UINTMAX_MAX ULLONG_MAX
-#endif	/* HAVE_UINTMAX_T || defined(uintmax_t) */
-
-/* Support for long double. */
-#ifndef LDOUBLE
-#if HAVE_LONG_DOUBLE
-#define LDOUBLE long double
-#else
-#define LDOUBLE double
-#endif	/* HAVE_LONG_DOUBLE */
-#endif	/* !defined(LDOUBLE) */
-
-/* Support for long long int. */
-#ifndef LLONG
-#if HAVE_LONG_LONG_INT
-#define LLONG long long int
-#else
-#define LLONG long int
-#endif	/* HAVE_LONG_LONG_INT */
-#endif	/* !defined(LLONG) */
-
-/* Support for intmax_t. */
-#ifndef INTMAX_T
-#if HAVE_INTMAX_T || defined(intmax_t)
-#define INTMAX_T intmax_t
-#else
-#define INTMAX_T LLONG
-#endif	/* HAVE_INTMAX_T || defined(intmax_t) */
-#endif	/* !defined(INTMAX_T) */
-
-/* Support for uintptr_t. */
-#ifndef UINTPTR_T
-#if HAVE_UINTPTR_T || defined(uintptr_t)
-#define UINTPTR_T uintptr_t
-#else
-#define UINTPTR_T unsigned long int
-#endif	/* HAVE_UINTPTR_T || defined(uintptr_t) */
-#endif	/* !defined(UINTPTR_T) */
-
-/* Support for ptrdiff_t. */
-#ifndef PTRDIFF_T
-#if HAVE_PTRDIFF_T || defined(ptrdiff_t)
-#define PTRDIFF_T ptrdiff_t
-#else
-#define PTRDIFF_T long int
-#endif	/* HAVE_PTRDIFF_T || defined(ptrdiff_t) */
-#endif	/* !defined(PTRDIFF_T) */
-
-/*
- * We need an unsigned integer type corresponding to ptrdiff_t (cf. C99:
- * 7.19.6.1, 7).  However, we'll simply use PTRDIFF_T and convert it to an
- * unsigned type if necessary.  This should work just fine in practice.
- */
-#ifndef UPTRDIFF_T
-#define UPTRDIFF_T PTRDIFF_T
-#endif	/* !defined(UPTRDIFF_T) */
-
-/*
- * We need a signed integer type corresponding to size_t (cf. C99: 7.19.6.1, 7).
- * However, we'll simply use size_t and convert it to a signed type if
- * necessary.  This should work just fine in practice.
- */
-#ifndef SSIZE_T
-#define SSIZE_T size_t
-#endif	/* !defined(SSIZE_T) */
-
-/* Either ERANGE or E2BIG should be available everywhere. */
-#ifndef ERANGE
-#define ERANGE E2BIG
-#endif	/* !defined(ERANGE) */
-#ifndef EOVERFLOW
-#define EOVERFLOW ERANGE
-#endif	/* !defined(EOVERFLOW) */
-
-/*
- * Buffer size to hold the octal string representation of UINT128_MAX without
- * nul-termination ("3777777777777777777777777777777777777777777").
- */
-#ifdef MAX_CONVERT_LENGTH
-#undef MAX_CONVERT_LENGTH
-#endif	/* defined(MAX_CONVERT_LENGTH) */
-#define MAX_CONVERT_LENGTH      43
-
-/* Format read states. */
-#define PRINT_S_DEFAULT         0
-#define PRINT_S_FLAGS           1
-#define PRINT_S_WIDTH           2
-#define PRINT_S_DOT             3
-#define PRINT_S_PRECISION       4
-#define PRINT_S_MOD             5
-#define PRINT_S_CONV            6
-
-/* Format flags. */
-#define PRINT_F_MINUS           (1 << 0)
-#define PRINT_F_PLUS            (1 << 1)
-#define PRINT_F_SPACE           (1 << 2)
-#define PRINT_F_NUM             (1 << 3)
-#define PRINT_F_ZERO            (1 << 4)
-#define PRINT_F_QUOTE           (1 << 5)
-#define PRINT_F_UP              (1 << 6)
-#define PRINT_F_UNSIGNED        (1 << 7)
-#define PRINT_F_TYPE_G          (1 << 8)
-#define PRINT_F_TYPE_E          (1 << 9)
-
-/* Conversion flags. */
-#define PRINT_C_CHAR            1
-#define PRINT_C_SHORT           2
-#define PRINT_C_LONG            3
-#define PRINT_C_LLONG           4
-#define PRINT_C_LDOUBLE         5
-#define PRINT_C_SIZE            6
-#define PRINT_C_PTRDIFF         7
-#define PRINT_C_INTMAX          8
-
-#ifndef MAX
-#define MAX(x, y) ((x >= y) ? x : y)
-#endif	/* !defined(MAX) */
-#ifndef CHARTOINT
-#define CHARTOINT(ch) (ch - '0')
-#endif	/* !defined(CHARTOINT) */
-#ifndef ISDIGIT
-#define ISDIGIT(ch) ('0' <= (unsigned char)ch && (unsigned char)ch <= '9')
-#endif	/* !defined(ISDIGIT) */
-#ifndef ISNAN
-#define ISNAN(x) (x != x)
-#endif	/* !defined(ISNAN) */
-#ifndef ISINF
-#define ISINF(x) (x != 0.0 && x + x == x)
-#endif	/* !defined(ISINF) */
-
-#ifdef OUTCHAR
-#undef OUTCHAR
-#endif	/* defined(OUTCHAR) */
-#define OUTCHAR(str, len, size, ch)                                          \
-do {                                                                         \
-	if (len + 1 < size)                                                  \
-		str[len] = ch;                                               \
-	(len)++;                                                             \
-} while (/* CONSTCOND */ 0)
-
-static void fmtstr(char *, size_t *, size_t, const char *, int, int, int);
-static void fmtint(char *, size_t *, size_t, INTMAX_T, int, int, int, int);
-static void fmtflt(char *, size_t *, size_t, LDOUBLE, int, int, int, int *);
-static void printsep(char *, size_t *, size_t);
-static int getnumsep(int);
-static int getexponent(LDOUBLE);
-static int convert(UINTMAX_T, char *, size_t, int, int);
-static UINTMAX_T cast(LDOUBLE);
-static UINTMAX_T myround(LDOUBLE);
-static LDOUBLE mypow10(int);
-
-int
-util_vsnprintf(char *str, size_t size, const char *format, va_list args)
-{
-	LDOUBLE fvalue;
-	INTMAX_T value;
-	unsigned char cvalue;
-	const char *strvalue;
-	INTMAX_T *intmaxptr;
-	PTRDIFF_T *ptrdiffptr;
-	SSIZE_T *sizeptr;
-	LLONG *llongptr;
-	long int *longptr;
-	int *intptr;
-	short int *shortptr;
-	signed char *charptr;
-	size_t len = 0;
-	int overflow = 0;
-	int base = 0;
-	int cflags = 0;
-	int flags = 0;
-	int width = 0;
-	int precision = -1;
-	int state = PRINT_S_DEFAULT;
-	char ch = *format++;
-
-	/*
-	 * C99 says: "If `n' is zero, nothing is written, and `s' may be a null
-	 * pointer." (7.19.6.5, 2)  We're forgiving and allow a NULL pointer
-	 * even if a size larger than zero was specified.  At least NetBSD's
-	 * snprintf(3) does the same, as well as other versions of this file.
-	 * (Though some of these versions will write to a non-NULL buffer even
-	 * if a size of zero was specified, which violates the standard.)
-	 */
-	if (str == NULL && size != 0)
-		size = 0;
-
-	while (ch != '\0')
-		switch (state) {
-		case PRINT_S_DEFAULT:
-			if (ch == '%')
-				state = PRINT_S_FLAGS;
-			else
-				OUTCHAR(str, len, size, ch);
-			ch = *format++;
-			break;
-		case PRINT_S_FLAGS:
-			switch (ch) {
-			case '-':
-				flags |= PRINT_F_MINUS;
-				ch = *format++;
-				break;
-			case '+':
-				flags |= PRINT_F_PLUS;
-				ch = *format++;
-				break;
-			case ' ':
-				flags |= PRINT_F_SPACE;
-				ch = *format++;
-				break;
-			case '#':
-				flags |= PRINT_F_NUM;
-				ch = *format++;
-				break;
-			case '0':
-				flags |= PRINT_F_ZERO;
-				ch = *format++;
-				break;
-			case '\'':	/* SUSv2 flag (not in C99). */
-				flags |= PRINT_F_QUOTE;
-				ch = *format++;
-				break;
-			default:
-				state = PRINT_S_WIDTH;
-				break;
-			}
-			break;
-		case PRINT_S_WIDTH:
-			if (ISDIGIT(ch)) {
-				ch = CHARTOINT(ch);
-				if (width > (INT_MAX - ch) / 10) {
-					overflow = 1;
-					goto out;
-				}
-				width = 10 * width + ch;
-				ch = *format++;
-			} else if (ch == '*') {
-				/*
-				 * C99 says: "A negative field width argument is
-				 * taken as a `-' flag followed by a positive
-				 * field width." (7.19.6.1, 5)
-				 */
-				if ((width = va_arg(args, int)) < 0) {
-					flags |= PRINT_F_MINUS;
-					width = -width;
-				}
-				ch = *format++;
-				state = PRINT_S_DOT;
-			} else
-				state = PRINT_S_DOT;
-			break;
-		case PRINT_S_DOT:
-			if (ch == '.') {
-				state = PRINT_S_PRECISION;
-				ch = *format++;
-			} else
-				state = PRINT_S_MOD;
-			break;
-		case PRINT_S_PRECISION:
-			if (precision == -1)
-				precision = 0;
-			if (ISDIGIT(ch)) {
-				ch = CHARTOINT(ch);
-				if (precision > (INT_MAX - ch) / 10) {
-					overflow = 1;
-					goto out;
-				}
-				precision = 10 * precision + ch;
-				ch = *format++;
-			} else if (ch == '*') {
-				/*
-				 * C99 says: "A negative precision argument is
-				 * taken as if the precision were omitted."
-				 * (7.19.6.1, 5)
-				 */
-				if ((precision = va_arg(args, int)) < 0)
-					precision = -1;
-				ch = *format++;
-				state = PRINT_S_MOD;
-			} else
-				state = PRINT_S_MOD;
-			break;
-		case PRINT_S_MOD:
-			switch (ch) {
-			case 'h':
-				ch = *format++;
-				if (ch == 'h') {	/* It's a char. */
-					ch = *format++;
-					cflags = PRINT_C_CHAR;
-				} else
-					cflags = PRINT_C_SHORT;
-				break;
-			case 'l':
-				ch = *format++;
-				if (ch == 'l') {	/* It's a long long. */
-					ch = *format++;
-					cflags = PRINT_C_LLONG;
-				} else
-					cflags = PRINT_C_LONG;
-				break;
-			case 'L':
-				cflags = PRINT_C_LDOUBLE;
-				ch = *format++;
-				break;
-			case 'j':
-				cflags = PRINT_C_INTMAX;
-				ch = *format++;
-				break;
-			case 't':
-				cflags = PRINT_C_PTRDIFF;
-				ch = *format++;
-				break;
-			case 'z':
-				cflags = PRINT_C_SIZE;
-				ch = *format++;
-				break;
-			}
-			state = PRINT_S_CONV;
-			break;
-		case PRINT_S_CONV:
-			switch (ch) {
-			case 'd':
-				/* FALLTHROUGH */
-			case 'i':
-				switch (cflags) {
-				case PRINT_C_CHAR:
-					value = (signed char)va_arg(args, int);
-					break;
-				case PRINT_C_SHORT:
-					value = (short int)va_arg(args, int);
-					break;
-				case PRINT_C_LONG:
-					value = va_arg(args, long int);
-					break;
-				case PRINT_C_LLONG:
-					value = va_arg(args, LLONG);
-					break;
-				case PRINT_C_SIZE:
-					value = va_arg(args, SSIZE_T);
-					break;
-				case PRINT_C_INTMAX:
-					value = va_arg(args, INTMAX_T);
-					break;
-				case PRINT_C_PTRDIFF:
-					value = va_arg(args, PTRDIFF_T);
-					break;
-				default:
-					value = va_arg(args, int);
-					break;
-				}
-				fmtint(str, &len, size, value, 10, width,
-				    precision, flags);
-				break;
-			case 'X':
-				flags |= PRINT_F_UP;
-				/* FALLTHROUGH */
-			case 'x':
-				base = 16;
-				/* FALLTHROUGH */
-			case 'o':
-				if (base == 0)
-					base = 8;
-				/* FALLTHROUGH */
-			case 'u':
-				if (base == 0)
-					base = 10;
-				flags |= PRINT_F_UNSIGNED;
-				switch (cflags) {
-				case PRINT_C_CHAR:
-					value = (unsigned char)va_arg(args,
-					    unsigned int);
-					break;
-				case PRINT_C_SHORT:
-					value = (unsigned short int)va_arg(args,
-					    unsigned int);
-					break;
-				case PRINT_C_LONG:
-					value = va_arg(args, unsigned long int);
-					break;
-				case PRINT_C_LLONG:
-					value = va_arg(args, ULLONG);
-					break;
-				case PRINT_C_SIZE:
-					value = va_arg(args, size_t);
-					break;
-				case PRINT_C_INTMAX:
-					value = va_arg(args, UINTMAX_T);
-					break;
-				case PRINT_C_PTRDIFF:
-					value = va_arg(args, UPTRDIFF_T);
-					break;
-				default:
-					value = va_arg(args, unsigned int);
-					break;
-				}
-				fmtint(str, &len, size, value, base, width,
-				    precision, flags);
-				break;
-			case 'A':
-				/* Not yet supported, we'll use "%F". */
-				/* FALLTHROUGH */
-			case 'F':
-				flags |= PRINT_F_UP;
-			case 'a':
-				/* Not yet supported, we'll use "%f". */
-				/* FALLTHROUGH */
-			case 'f':
-				if (cflags == PRINT_C_LDOUBLE)
-					fvalue = va_arg(args, LDOUBLE);
-				else
-					fvalue = va_arg(args, double);
-				fmtflt(str, &len, size, fvalue, width,
-				    precision, flags, &overflow);
-				if (overflow)
-					goto out;
-				break;
-			case 'E':
-				flags |= PRINT_F_UP;
-				/* FALLTHROUGH */
-			case 'e':
-				flags |= PRINT_F_TYPE_E;
-				if (cflags == PRINT_C_LDOUBLE)
-					fvalue = va_arg(args, LDOUBLE);
-				else
-					fvalue = va_arg(args, double);
-				fmtflt(str, &len, size, fvalue, width,
-				    precision, flags, &overflow);
-				if (overflow)
-					goto out;
-				break;
-			case 'G':
-				flags |= PRINT_F_UP;
-				/* FALLTHROUGH */
-			case 'g':
-				flags |= PRINT_F_TYPE_G;
-				if (cflags == PRINT_C_LDOUBLE)
-					fvalue = va_arg(args, LDOUBLE);
-				else
-					fvalue = va_arg(args, double);
-				/*
-				 * If the precision is zero, it is treated as
-				 * one (cf. C99: 7.19.6.1, 8).
-				 */
-				if (precision == 0)
-					precision = 1;
-				fmtflt(str, &len, size, fvalue, width,
-				    precision, flags, &overflow);
-				if (overflow)
-					goto out;
-				break;
-			case 'c':
-				cvalue = (unsigned char)va_arg(args, int);
-				OUTCHAR(str, len, size, cvalue);
-				break;
-			case 's':
-				strvalue = va_arg(args, char *);
-				fmtstr(str, &len, size, strvalue, width,
-				    precision, flags);
-				break;
-			case 'p':
-				/*
-				 * C99 says: "The value of the pointer is
-				 * converted to a sequence of printing
-				 * characters, in an implementation-defined
-				 * manner." (C99: 7.19.6.1, 8)
-				 */
-				if ((strvalue = va_arg(args, void *)) == NULL)
-					/*
-					 * We use the glibc format.  BSD prints
-					 * "0x0", SysV "0".
-					 */
-					fmtstr(str, &len, size, "(nil)", width,
-					    -1, flags);
-				else {
-					/*
-					 * We use the BSD/glibc format.  SysV
-					 * omits the "0x" prefix (which we emit
-					 * using the PRINT_F_NUM flag).
-					 */
-					flags |= PRINT_F_NUM;
-					flags |= PRINT_F_UNSIGNED;
-					fmtint(str, &len, size,
-					    (UINTPTR_T)strvalue, 16, width,
-					    precision, flags);
-				}
-				break;
-			case 'n':
-				switch (cflags) {
-				case PRINT_C_CHAR:
-					charptr = va_arg(args, signed char *);
-					*charptr = (signed char)len;
-					break;
-				case PRINT_C_SHORT:
-					shortptr = va_arg(args, short int *);
-					*shortptr = (short int)len;
-					break;
-				case PRINT_C_LONG:
-					longptr = va_arg(args, long int *);
-					*longptr = (long int)len;
-					break;
-				case PRINT_C_LLONG:
-					llongptr = va_arg(args, LLONG *);
-					*llongptr = (LLONG)len;
-					break;
-				case PRINT_C_SIZE:
-					/*
-					 * C99 says that with the "z" length
-					 * modifier, "a following `n' conversion
-					 * specifier applies to a pointer to a
-					 * signed integer type corresponding to
-					 * size_t argument." (7.19.6.1, 7)
-					 */
-					sizeptr = va_arg(args, SSIZE_T *);
-					*sizeptr = len;
-					break;
-				case PRINT_C_INTMAX:
-					intmaxptr = va_arg(args, INTMAX_T *);
-					*intmaxptr = len;
-					break;
-				case PRINT_C_PTRDIFF:
-					ptrdiffptr = va_arg(args, PTRDIFF_T *);
-					*ptrdiffptr = len;
-					break;
-				default:
-					intptr = va_arg(args, int *);
-					*intptr = (int)len;
-					break;
-				}
-				break;
-			case '%':	/* Print a "%" character verbatim. */
-				OUTCHAR(str, len, size, ch);
-				break;
-			default:	/* Skip other characters. */
-				break;
-			}
-			ch = *format++;
-			state = PRINT_S_DEFAULT;
-			base = cflags = flags = width = 0;
-			precision = -1;
-			break;
-		}
-out:
-	if (len < size)
-		str[len] = '\0';
-	else if (size > 0)
-		str[size - 1] = '\0';
-
-	if (overflow || len >= INT_MAX) {
-		return -1;
-	}
-	return (int)len;
-}
-
-static void
-fmtstr(char *str, size_t *len, size_t size, const char *value, int width,
-       int precision, int flags)
-{
-	int padlen, strln;	/* Amount to pad. */
-	int noprecision = (precision == -1);
-
-	if (value == NULL)	/* We're forgiving. */
-		value = "(null)";
-
-	/* If a precision was specified, don't read the string past it. */
-	for (strln = 0; value[strln] != '\0' &&
-	    (noprecision || strln < precision); strln++)
-		continue;
-
-	if ((padlen = width - strln) < 0)
-		padlen = 0;
-	if (flags & PRINT_F_MINUS)	/* Left justify. */
-		padlen = -padlen;
-
-	while (padlen > 0) {	/* Leading spaces. */
-		OUTCHAR(str, *len, size, ' ');
-		padlen--;
-	}
-	while (*value != '\0' && (noprecision || precision-- > 0)) {
-		OUTCHAR(str, *len, size, *value);
-		value++;
-	}
-	while (padlen < 0) {	/* Trailing spaces. */
-		OUTCHAR(str, *len, size, ' ');
-		padlen++;
-	}
-}
-
-static void
-fmtint(char *str, size_t *len, size_t size, INTMAX_T value, int base, int width,
-       int precision, int flags)
-{
-	UINTMAX_T uvalue;
-	char iconvert[MAX_CONVERT_LENGTH];
-	char sign = 0;
-	char hexprefix = 0;
-	int spadlen = 0;	/* Amount to space pad. */
-	int zpadlen = 0;	/* Amount to zero pad. */
-	int pos;
-	int separators = (flags & PRINT_F_QUOTE);
-	int noprecision = (precision == -1);
-
-	if (flags & PRINT_F_UNSIGNED)
-		uvalue = value;
-	else {
-		uvalue = (value >= 0) ? value : -value;
-		if (value < 0)
-			sign = '-';
-		else if (flags & PRINT_F_PLUS)	/* Do a sign. */
-			sign = '+';
-		else if (flags & PRINT_F_SPACE)
-			sign = ' ';
-	}
-
-	pos = convert(uvalue, iconvert, sizeof(iconvert), base,
-	    flags & PRINT_F_UP);
-
-	if (flags & PRINT_F_NUM && uvalue != 0) {
-		/*
-		 * C99 says: "The result is converted to an `alternative form'.
-		 * For `o' conversion, it increases the precision, if and only
-		 * if necessary, to force the first digit of the result to be a
-		 * zero (if the value and precision are both 0, a single 0 is
-		 * printed).  For `x' (or `X') conversion, a nonzero result has
-		 * `0x' (or `0X') prefixed to it." (7.19.6.1, 6)
-		 */
-		switch (base) {
-		case 8:
-			if (precision <= pos)
-				precision = pos + 1;
-			break;
-		case 16:
-			hexprefix = (flags & PRINT_F_UP) ? 'X' : 'x';
-			break;
-		}
-	}
-
-	if (separators)	/* Get the number of group separators we'll print. */
-		separators = getnumsep(pos);
-
-	zpadlen = precision - pos - separators;
-	spadlen = width                         /* Minimum field width. */
-	    - separators                        /* Number of separators. */
-	    - MAX(precision, pos)               /* Number of integer digits. */
-	    - ((sign != 0) ? 1 : 0)             /* Will we print a sign? */
-	    - ((hexprefix != 0) ? 2 : 0);       /* Will we print a prefix? */
-
-	if (zpadlen < 0)
-		zpadlen = 0;
-	if (spadlen < 0)
-		spadlen = 0;
-
-	/*
-	 * C99 says: "If the `0' and `-' flags both appear, the `0' flag is
-	 * ignored.  For `d', `i', `o', `u', `x', and `X' conversions, if a
-	 * precision is specified, the `0' flag is ignored." (7.19.6.1, 6)
-	 */
-	if (flags & PRINT_F_MINUS)	/* Left justify. */
-		spadlen = -spadlen;
-	else if (flags & PRINT_F_ZERO && noprecision) {
-		zpadlen += spadlen;
-		spadlen = 0;
-	}
-	while (spadlen > 0) {	/* Leading spaces. */
-		OUTCHAR(str, *len, size, ' ');
-		spadlen--;
-	}
-	if (sign != 0)	/* Sign. */
-		OUTCHAR(str, *len, size, sign);
-	if (hexprefix != 0) {	/* A "0x" or "0X" prefix. */
-		OUTCHAR(str, *len, size, '0');
-		OUTCHAR(str, *len, size, hexprefix);
-	}
-	while (zpadlen > 0) {	/* Leading zeros. */
-		OUTCHAR(str, *len, size, '0');
-		zpadlen--;
-	}
-	while (pos > 0) {	/* The actual digits. */
-		pos--;
-		OUTCHAR(str, *len, size, iconvert[pos]);
-		if (separators > 0 && pos > 0 && pos % 3 == 0)
-			printsep(str, len, size);
-	}
-	while (spadlen < 0) {	/* Trailing spaces. */
-		OUTCHAR(str, *len, size, ' ');
-		spadlen++;
-	}
-}
-
-static void
-fmtflt(char *str, size_t *len, size_t size, LDOUBLE fvalue, int width,
-       int precision, int flags, int *overflow)
-{
-	LDOUBLE ufvalue;
-	UINTMAX_T intpart;
-	UINTMAX_T fracpart;
-	UINTMAX_T mask;
-	const char *infnan = NULL;
-	char iconvert[MAX_CONVERT_LENGTH];
-	char fconvert[MAX_CONVERT_LENGTH];
-	char econvert[4];	/* "e-12" (without nul-termination). */
-	char esign = 0;
-	char sign = 0;
-	int leadfraczeros = 0;
-	int exponent = 0;
-	int emitpoint = 0;
-	int omitzeros = 0;
-	int omitcount = 0;
-	int padlen = 0;
-	int epos = 0;
-	int fpos = 0;
-	int ipos = 0;
-	int separators = (flags & PRINT_F_QUOTE);
-	int estyle = (flags & PRINT_F_TYPE_E);
-#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT
-	struct lconv *lc = localeconv();
-#endif	/* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */
-
-	/*
-	 * AIX' man page says the default is 0, but C99 and at least Solaris'
-	 * and NetBSD's man pages say the default is 6, and sprintf(3) on AIX
-	 * defaults to 6.
-	 */
-	if (precision == -1)
-		precision = 6;
-
-	if (fvalue < 0.0)
-		sign = '-';
-	else if (flags & PRINT_F_PLUS)	/* Do a sign. */
-		sign = '+';
-	else if (flags & PRINT_F_SPACE)
-		sign = ' ';
-
-	if (ISNAN(fvalue))
-		infnan = (flags & PRINT_F_UP) ? "NAN" : "nan";
-	else if (ISINF(fvalue))
-		infnan = (flags & PRINT_F_UP) ? "INF" : "inf";
-
-	if (infnan != NULL) {
-		if (sign != 0)
-			iconvert[ipos++] = sign;
-		while (*infnan != '\0')
-			iconvert[ipos++] = *infnan++;
-		fmtstr(str, len, size, iconvert, width, ipos, flags);
-		return;
-	}
-
-	/* "%e" (or "%E") or "%g" (or "%G") conversion. */
-	if (flags & PRINT_F_TYPE_E || flags & PRINT_F_TYPE_G) {
-		if (flags & PRINT_F_TYPE_G) {
-			/*
-			 * For "%g" (and "%G") conversions, the precision
-			 * specifies the number of significant digits, which
-			 * includes the digits in the integer part.  The
-			 * conversion will or will not be using "e-style" (like
-			 * "%e" or "%E" conversions) depending on the precision
-			 * and on the exponent.  However, the exponent can be
-			 * affected by rounding the converted value, so we'll
-			 * leave this decision for later.  Until then, we'll
-			 * assume that we're going to do an "e-style" conversion
-			 * (in order to get the exponent calculated).  For
-			 * "e-style", the precision must be decremented by one.
-			 */
-			precision--;
-			/*
-			 * For "%g" (and "%G") conversions, trailing zeros are
-			 * removed from the fractional portion of the result
-			 * unless the "#" flag was specified.
-			 */
-			if (!(flags & PRINT_F_NUM))
-				omitzeros = 1;
-		}
-		exponent = getexponent(fvalue);
-		estyle = 1;
-	}
-
-again:
-	/*
-	 * Sorry, we only support 9, 19, or 38 digits (that is, the number of
-	 * digits of the 32-bit, the 64-bit, or the 128-bit UINTMAX_MAX value
-	 * minus one) past the decimal point due to our conversion method.
-	 */
-	switch (sizeof(UINTMAX_T)) {
-	case 16:
-		if (precision > 38)
-			precision = 38;
-		break;
-	case 8:
-		if (precision > 19)
-			precision = 19;
-		break;
-	default:
-		if (precision > 9)
-			precision = 9;
-		break;
-	}
-
-	ufvalue = (fvalue >= 0.0) ? fvalue : -fvalue;
-	if (estyle)	/* We want exactly one integer digit. */
-		ufvalue /= mypow10(exponent);
-
-	if ((intpart = cast(ufvalue)) == UINTMAX_MAX) {
-		*overflow = 1;
-		return;
-	}
-
-	/*
-	 * Factor of ten with the number of digits needed for the fractional
-	 * part.  For example, if the precision is 3, the mask will be 1000.
-	 */
-	mask = (UINTMAX_T)mypow10(precision);
-	/*
-	 * We "cheat" by converting the fractional part to integer by
-	 * multiplying by a factor of ten.
-	 */
-	if ((fracpart = myround(mask * (ufvalue - intpart))) >= mask) {
-		/*
-		 * For example, ufvalue = 2.99962, intpart = 2, and mask = 1000
-		 * (because precision = 3).  Now, myround(1000 * 0.99962) will
-		 * return 1000.  So, the integer part must be incremented by one
-		 * and the fractional part must be set to zero.
-		 */
-		intpart++;
-		fracpart = 0;
-		if (estyle && intpart == 10) {
-			/*
-			 * The value was rounded up to ten, but we only want one
-			 * integer digit if using "e-style".  So, the integer
-			 * part must be set to one and the exponent must be
-			 * incremented by one.
-			 */
-			intpart = 1;
-			exponent++;
-		}
-	}
-
-	/*
-	 * Now that we know the real exponent, we can check whether or not to
-	 * use "e-style" for "%g" (and "%G") conversions.  If we don't need
-	 * "e-style", the precision must be adjusted and the integer and
-	 * fractional parts must be recalculated from the original value.
-	 *
-	 * C99 says: "Let P equal the precision if nonzero, 6 if the precision
-	 * is omitted, or 1 if the precision is zero.  Then, if a conversion
-	 * with style `E' would have an exponent of X:
-	 *
-	 * - if P > X >= -4, the conversion is with style `f' (or `F') and
-	 *   precision P - (X + 1).
-	 *
-	 * - otherwise, the conversion is with style `e' (or `E') and precision
-	 *   P - 1." (7.19.6.1, 8)
-	 *
-	 * Note that we had decremented the precision by one.
-	 */
-	if (flags & PRINT_F_TYPE_G && estyle &&
-	    precision + 1 > exponent && exponent >= -4) {
-		precision -= exponent;
-		estyle = 0;
-		goto again;
-	}
-
-	if (estyle) {
-		if (exponent < 0) {
-			exponent = -exponent;
-			esign = '-';
-		} else
-			esign = '+';
-
-		/*
-		 * Convert the exponent.  The sizeof(econvert) is 4.  So, the
-		 * econvert buffer can hold e.g. "e+99" and "e-99".  We don't
-		 * support an exponent which contains more than two digits.
-		 * Therefore, the following stores are safe.
-		 */
-		epos = convert(exponent, econvert, 2, 10, 0);
-		/*
-		 * C99 says: "The exponent always contains at least two digits,
-		 * and only as many more digits as necessary to represent the
-		 * exponent." (7.19.6.1, 8)
-		 */
-		if (epos == 1)
-			econvert[epos++] = '0';
-		econvert[epos++] = esign;
-		econvert[epos++] = (flags & PRINT_F_UP) ? 'E' : 'e';
-	}
-
-	/* Convert the integer part and the fractional part. */
-	ipos = convert(intpart, iconvert, sizeof(iconvert), 10, 0);
-	if (fracpart != 0)	/* convert() would return 1 if fracpart == 0. */
-		fpos = convert(fracpart, fconvert, sizeof(fconvert), 10, 0);
-
-	leadfraczeros = precision - fpos;
-
-	if (omitzeros) {
-		if (fpos > 0)	/* Omit trailing fractional part zeros. */
-			while (omitcount < fpos && fconvert[omitcount] == '0')
-				omitcount++;
-		else {	/* The fractional part is zero, omit it completely. */
-			omitcount = precision;
-			leadfraczeros = 0;
-		}
-		precision -= omitcount;
-	}
-
-	/*
-	 * Print a decimal point if either the fractional part is non-zero
-	 * and/or the "#" flag was specified.
-	 */
-	if (precision > 0 || flags & PRINT_F_NUM)
-		emitpoint = 1;
-	if (separators)	/* Get the number of group separators we'll print. */
-		separators = getnumsep(ipos);
-
-	padlen = width                  /* Minimum field width. */
-	    - ipos                      /* Number of integer digits. */
-	    - epos                      /* Number of exponent characters. */
-	    - precision                 /* Number of fractional digits. */
-	    - separators                /* Number of group separators. */
-	    - (emitpoint ? 1 : 0)       /* Will we print a decimal point? */
-	    - ((sign != 0) ? 1 : 0);    /* Will we print a sign character? */
-
-	if (padlen < 0)
-		padlen = 0;
-
-	/*
-	 * C99 says: "If the `0' and `-' flags both appear, the `0' flag is
-	 * ignored." (7.19.6.1, 6)
-	 */
-	if (flags & PRINT_F_MINUS)	/* Left justifty. */
-		padlen = -padlen;
-	else if (flags & PRINT_F_ZERO && padlen > 0) {
-		if (sign != 0) {	/* Sign. */
-			OUTCHAR(str, *len, size, sign);
-			sign = 0;
-		}
-		while (padlen > 0) {	/* Leading zeros. */
-			OUTCHAR(str, *len, size, '0');
-			padlen--;
-		}
-	}
-	while (padlen > 0) {	/* Leading spaces. */
-		OUTCHAR(str, *len, size, ' ');
-		padlen--;
-	}
-	if (sign != 0)	/* Sign. */
-		OUTCHAR(str, *len, size, sign);
-	while (ipos > 0) {	/* Integer part. */
-		ipos--;
-		OUTCHAR(str, *len, size, iconvert[ipos]);
-		if (separators > 0 && ipos > 0 && ipos % 3 == 0)
-			printsep(str, len, size);
-	}
-	if (emitpoint) {	/* Decimal point. */
-#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT
-		if (lc->decimal_point != NULL && *lc->decimal_point != '\0')
-			OUTCHAR(str, *len, size, *lc->decimal_point);
-		else	/* We'll always print some decimal point character. */
-#endif	/* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */
-			OUTCHAR(str, *len, size, '.');
-	}
-	while (leadfraczeros > 0) {	/* Leading fractional part zeros. */
-		OUTCHAR(str, *len, size, '0');
-		leadfraczeros--;
-	}
-	while (fpos > omitcount) {	/* The remaining fractional part. */
-		fpos--;
-		OUTCHAR(str, *len, size, fconvert[fpos]);
-	}
-	while (epos > 0) {	/* Exponent. */
-		epos--;
-		OUTCHAR(str, *len, size, econvert[epos]);
-	}
-	while (padlen < 0) {	/* Trailing spaces. */
-		OUTCHAR(str, *len, size, ' ');
-		padlen++;
-	}
-}
-
-static void
-printsep(char *str, size_t *len, size_t size)
-{
-#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP
-	struct lconv *lc = localeconv();
-	int i;
-
-	if (lc->thousands_sep != NULL)
-		for (i = 0; lc->thousands_sep[i] != '\0'; i++)
-			OUTCHAR(str, *len, size, lc->thousands_sep[i]);
-	else
-#endif	/* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */
-		OUTCHAR(str, *len, size, ',');
-}
-
-static int
-getnumsep(int digits)
-{
-	int separators = (digits - ((digits % 3 == 0) ? 1 : 0)) / 3;
-#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP
-	int strln;
-	struct lconv *lc = localeconv();
-
-	/* We support an arbitrary separator length (including zero). */
-	if (lc->thousands_sep != NULL) {
-		for (strln = 0; lc->thousands_sep[strln] != '\0'; strln++)
-			continue;
-		separators *= strln;
-	}
-#endif	/* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */
-	return separators;
-}
-
-static int
-getexponent(LDOUBLE value)
-{
-	LDOUBLE tmp = (value >= 0.0) ? value : -value;
-	int exponent = 0;
-
-	/*
-	 * We check for 99 > exponent > -99 in order to work around possible
-	 * endless loops which could happen (at least) in the second loop (at
-	 * least) if we're called with an infinite value.  However, we checked
-	 * for infinity before calling this function using our ISINF() macro, so
-	 * this might be somewhat paranoid.
-	 */
-	while (tmp < 1.0 && tmp > 0.0 && --exponent > -99)
-		tmp *= 10;
-	while (tmp >= 10.0 && ++exponent < 99)
-		tmp /= 10;
-
-	return exponent;
-}
-
-static int
-convert(UINTMAX_T value, char *buf, size_t size, int base, int caps)
-{
-	const char *digits = caps ? "0123456789ABCDEF" : "0123456789abcdef";
-	size_t pos = 0;
-
-	/* We return an unterminated buffer with the digits in reverse order. */
-	do {
-		buf[pos++] = digits[value % base];
-		value /= base;
-	} while (value != 0 && pos < size);
-
-	return (int)pos;
-}
-
-static UINTMAX_T
-cast(LDOUBLE value)
-{
-	UINTMAX_T result;
-
-	/*
-	 * We check for ">=" and not for ">" because if UINTMAX_MAX cannot be
-	 * represented exactly as an LDOUBLE value (but is less than LDBL_MAX),
-	 * it may be increased to the nearest higher representable value for the
-	 * comparison (cf. C99: 6.3.1.4, 2).  It might then equal the LDOUBLE
-	 * value although converting the latter to UINTMAX_T would overflow.
-	 */
-	if (value >= UINTMAX_MAX)
-		return UINTMAX_MAX;
-
-	result = (UINTMAX_T)value;
-	/*
-	 * At least on NetBSD/sparc64 3.0.2 and 4.99.30, casting long double to
-	 * an integer type converts e.g. 1.9 to 2 instead of 1 (which violates
-	 * the standard).  Sigh.
-	 */
-	return (result <= value) ? result : result - 1;
-}
-
-static UINTMAX_T
-myround(LDOUBLE value)
-{
-	UINTMAX_T intpart = cast(value);
-
-	return ((value -= intpart) < 0.5) ? intpart : intpart + 1;
-}
-
-static LDOUBLE
-mypow10(int exponent)
-{
-	LDOUBLE result = 1;
-
-	while (exponent > 0) {
-		result *= 10;
-		exponent--;
-	}
-	while (exponent < 0) {
-		result /= 10;
-		exponent++;
-	}
-	return result;
-}
-#endif	/* !HAVE_VSNPRINTF */
-
-#if !HAVE_VASPRINTF
-#if NEED_MYMEMCPY
-void *
-mymemcpy(void *dst, void *src, size_t len)
-{
-	const char *from = src;
-	char *to = dst;
-
-	/* No need for optimization, we use this only to replace va_copy(3). */
-	while (len-- > 0)
-		*to++ = *from++;
-	return dst;
-}
-#endif	/* NEED_MYMEMCPY */
-
-int
-util_vasprintf(char **ret, const char *format, va_list ap)
-{
-	size_t size;
-	int len;
-	va_list aq;
-
-	VA_COPY(aq, ap);
-	len = vsnprintf(NULL, 0, format, aq);
-	VA_END_COPY(aq);
-	if (len < 0 || (*ret = malloc(size = len + 1)) == NULL)
-		return -1;
-	return vsnprintf(*ret, size, format, ap);
-}
-#endif	/* !HAVE_VASPRINTF */
-
-#if !HAVE_SNPRINTF
-#if HAVE_STDARG_H
-int
-util_snprintf(char *str, size_t size, const char *format, ...)
-#else
-int
-util_snprintf(va_alist) va_dcl
-#endif	/* HAVE_STDARG_H */
-{
-#if !HAVE_STDARG_H
-	char *str;
-	size_t size;
-	char *format;
-#endif	/* HAVE_STDARG_H */
-	va_list ap;
-	int len;
-
-	VA_START(ap, format);
-	VA_SHIFT(ap, str, char *);
-	VA_SHIFT(ap, size, size_t);
-	VA_SHIFT(ap, format, const char *);
-	len = vsnprintf(str, size, format, ap);
-	va_end(ap);
-	return len;
-}
-#endif	/* !HAVE_SNPRINTF */
-
-#if !HAVE_ASPRINTF
-#if HAVE_STDARG_H
-int
-util_asprintf(char **ret, const char *format, ...)
-#else
-int
-util_asprintf(va_alist) va_dcl
-#endif	/* HAVE_STDARG_H */
-{
-#if !HAVE_STDARG_H
-	char **ret;
-	char *format;
-#endif	/* HAVE_STDARG_H */
-	va_list ap;
-	int len;
-
-	VA_START(ap, format);
-	VA_SHIFT(ap, ret, char **);
-	VA_SHIFT(ap, format, const char *);
-	len = vasprintf(ret, format, ap);
-	va_end(ap);
-	return len;
-}
-#endif	/* !HAVE_ASPRINTF */
-#else	/* Dummy declaration to avoid empty translation unit warnings. */
-int main(void);
-#endif	/* !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || [...] */
-
-
-/* vim: set joinspaces textwidth=80: */
diff --git a/src/gallium/auxiliary/util/u_split_prim.h b/src/gallium/auxiliary/util/u_split_prim.h
index 7f80fc12700..5afb7d9a920 100644
--- a/src/gallium/auxiliary/util/u_split_prim.h
+++ b/src/gallium/auxiliary/util/u_split_prim.h
@@ -23,7 +23,7 @@ struct util_split_prim {
    uint edgeflag_off:1;
 };
 
-static INLINE void
+static inline void
 util_split_prim_init(struct util_split_prim *s,
                   unsigned mode, unsigned start, unsigned count)
 {
@@ -41,7 +41,7 @@ util_split_prim_init(struct util_split_prim *s,
    s->repeat_first = 0;
 }
 
-static INLINE boolean
+static inline boolean
 util_split_prim_next(struct util_split_prim *s, unsigned max_verts)
 {
    int repeat = 0;
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
index d4f51912a2d..7f8e5a1a3cf 100644
--- a/src/gallium/auxiliary/util/u_sse.h
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -51,7 +51,7 @@ union m128i {
    uint ui[4];
 };
 
-static INLINE void u_print_epi8(const char *name, __m128i r)
+static inline void u_print_epi8(const char *name, __m128i r)
 {
    union { __m128i m; ubyte ub[16]; } u;
    u.m = r;
@@ -80,7 +80,7 @@ static INLINE void u_print_epi8(const char *name, __m128i r)
                 u.ub[12], u.ub[13], u.ub[14], u.ub[15]);
 }
 
-static INLINE void u_print_epi16(const char *name, __m128i r)
+static inline void u_print_epi16(const char *name, __m128i r)
 {
    union { __m128i m; ushort us[8]; } u;
    u.m = r;
@@ -99,7 +99,7 @@ static INLINE void u_print_epi16(const char *name, __m128i r)
                 u.us[4],  u.us[5],  u.us[6],  u.us[7]);
 }
 
-static INLINE void u_print_epi32(const char *name, __m128i r)
+static inline void u_print_epi32(const char *name, __m128i r)
 {
    union { __m128i m; uint ui[4]; } u;
    u.m = r;
@@ -113,7 +113,7 @@ static INLINE void u_print_epi32(const char *name, __m128i r)
                 u.ui[0],  u.ui[1],  u.ui[2],  u.ui[3]);
 }
 
-static INLINE void u_print_ps(const char *name, __m128 r)
+static inline void u_print_ps(const char *name, __m128 r)
 {
    union { __m128 m; float f[4]; } u;
    u.m = r;
@@ -179,7 +179,7 @@ _mm_shuffle_epi8(__m128i a, __m128i mask)
  * _mm_mullo_epi32() intrinsic as to not justify adding an sse4
  * dependency at this point.
  */
-static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
+static inline __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
 {
    __m128i a4   = _mm_srli_epi64(a, 32);  /* shift by one dword */
    __m128i b4   = _mm_srli_epi64(b, 32);  /* shift by one dword */
@@ -204,7 +204,7 @@ static INLINE __m128i mm_mullo_epi32(const __m128i a, const __m128i b)
 }
 
 
-static INLINE void
+static inline void
 transpose4_epi32(const __m128i * restrict a,
                  const __m128i * restrict b,
                  const __m128i * restrict c,
diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h
index dc89c4400bc..f7ab09c8f1c 100644
--- a/src/gallium/auxiliary/util/u_string.h
+++ b/src/gallium/auxiliary/util/u_string.h
@@ -35,13 +35,14 @@
 #ifndef U_STRING_H_
 #define U_STRING_H_
 
-#if !defined(_MSC_VER) && !defined(XF86_LIBC_H)
+#if !defined(XF86_LIBC_H)
 #include <stdio.h>
 #endif
 #include <stddef.h>
 #include <stdarg.h>
 
 #include "pipe/p_compiler.h"
+#include "util/macros.h" // PRINTFLIKE
 
 
 #ifdef __cplusplus
@@ -54,7 +55,7 @@ extern "C" {
 
 #else
 
-static INLINE char *
+static inline char *
 util_strchrnul(const char *s, char c)
 {
    for (; *s && *s != c; ++s);
@@ -64,18 +65,44 @@ util_strchrnul(const char *s, char c)
 
 #endif
 
-#ifdef _MSC_VER
+#ifdef _WIN32
 
-int util_vsnprintf(char *, size_t, const char *, va_list);
-int util_snprintf(char *str, size_t size, const char *format, ...);
+static inline int
+util_vsnprintf(char *str, size_t size, const char *format, va_list ap)
+{
+   /* We need to use _vscprintf to calculate the length as vsnprintf returns -1
+    * if the number of characters to write is greater than count.
+    */
+   va_list ap_copy;
+   int ret;
+   va_copy(ap_copy, ap);
+   ret = _vsnprintf(str, size, format, ap);
+   if (ret < 0) {
+      ret = _vscprintf(format, ap_copy);
+   }
+   return ret;
+}
+
+static inline int
+   PRINTFLIKE(3, 4)
+util_snprintf(char *str, size_t size, const char *format, ...)
+{
+   va_list ap;
+   int ret;
+   va_start(ap, format);
+   ret = util_vsnprintf(str, size, format, ap);
+   va_end(ap);
+   return ret;
+}
 
-static INLINE void
+static inline void
 util_vsprintf(char *str, const char *format, va_list ap)
 {
    util_vsnprintf(str, (size_t)-1, format, ap);
 }
 
-static INLINE void
+static inline void
+   PRINTFLIKE(2, 3)
 util_sprintf(char *str, const char *format, ...)
 {
    va_list ap;
@@ -84,7 +111,7 @@ util_sprintf(char *str, const char *format, ...)
    va_end(ap);
 }
 
-static INLINE char *
+static inline char *
 util_strchr(const char *s, char c)
 {
    char *p = util_strchrnul(s, c);
@@ -92,7 +119,7 @@ util_strchr(const char *s, char c)
    return *p ? p : NULL;
 }
 
-static INLINE char*
+static inline char*
 util_strncat(char *dst, const char *src, size_t n)
 {
    char *p = dst + strlen(dst);
@@ -106,7 +133,7 @@ util_strncat(char *dst, const char *src, size_t n)
    return dst;
 }
 
-static INLINE int
+static inline int
 util_strcmp(const char *s1, const char *s2)
 {
    unsigned char u1, u2;
@@ -122,7 +149,7 @@ util_strcmp(const char *s1, const char *s2)
    return 0;
 }
 
-static INLINE int
+static inline int
 util_strncmp(const char *s1, const char *s2, size_t n)
 {
    unsigned char u1, u2;
@@ -138,7 +165,7 @@ util_strncmp(const char *s1, const char *s2, size_t n)
    return 0;
 }
 
-static INLINE char *
+static inline char *
 util_strstr(const char *haystack, const char *needle)
 {
    const char *p = haystack;
@@ -152,7 +179,7 @@ util_strstr(const char *haystack, const char *needle)
    return NULL;
 }
 
-static INLINE void *
+static inline void *
 util_memmove(void *dest, const void *src, size_t n)
 {
    char *p = (char *)dest;
@@ -199,7 +226,7 @@ struct util_strbuf
 };
 
 
-static INLINE void
+static inline void
 util_strbuf_init(struct util_strbuf *sbuf, char *str, size_t size) 
 {
    sbuf->str = str;
@@ -209,7 +236,7 @@ util_strbuf_init(struct util_strbuf *sbuf, char *str, size_t size)
 }
 
 
-static INLINE void
+static inline void
 util_strbuf_printf(struct util_strbuf *sbuf, const char *format, ...)
 {
    if(sbuf->left > 1) {
diff --git a/src/gallium/auxiliary/util/u_surfaces.h b/src/gallium/auxiliary/util/u_surfaces.h
index 1605215cb88..b84694c540b 100644
--- a/src/gallium/auxiliary/util/u_surfaces.h
+++ b/src/gallium/auxiliary/util/u_surfaces.h
@@ -50,7 +50,7 @@ util_surfaces_do_get(struct util_surfaces *us, unsigned surface_struct_size,
                      struct pipe_surface **res);
 
 /* fast inline path for the very common case */
-static INLINE boolean
+static inline boolean
 util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size,
                   struct pipe_context *ctx, struct pipe_resource *pt,
                   unsigned level, unsigned layer,
@@ -70,7 +70,7 @@ util_surfaces_get(struct util_surfaces *us, unsigned surface_struct_size,
    return util_surfaces_do_get(us, surface_struct_size, ctx, pt, level, layer, res);
 }
 
-static INLINE struct pipe_surface *
+static inline struct pipe_surface *
 util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned level, unsigned layer)
 {
    if(!us->u.pv)
@@ -84,7 +84,7 @@ util_surfaces_peek(struct util_surfaces *us, struct pipe_resource *pt, unsigned
 
 void util_surfaces_do_detach(struct util_surfaces *us, struct pipe_surface *ps);
 
-static INLINE void
+static inline void
 util_surfaces_detach(struct util_surfaces *us, struct pipe_surface *ps)
 {
    if(likely(ps->texture->target == PIPE_TEXTURE_2D || ps->texture->target == PIPE_TEXTURE_RECT))
diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h
index a33d7f7722b..dc1f568a8e5 100644
--- a/src/gallium/auxiliary/util/u_tile.h
+++ b/src/gallium/auxiliary/util/u_tile.h
@@ -42,7 +42,7 @@ struct pipe_transfer;
  *
  * \return TRUE if tile is totally clipped, FALSE otherwise
  */
-static INLINE boolean
+static inline boolean
 u_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_box *box)
 {
    if ((int) x >= box->width)
diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h
index 2bee1e00014..a5017d6bce2 100644
--- a/src/gallium/auxiliary/util/u_time.h
+++ b/src/gallium/auxiliary/util/u_time.h
@@ -60,7 +60,7 @@ struct util_time
    
 
 PIPE_DEPRECATED
-static INLINE void
+static inline void
 util_time_get(struct util_time *t)
 {
    t->counter = os_time_get();
@@ -71,7 +71,7 @@ util_time_get(struct util_time *t)
  * Return t2 = t1 + usecs
  */
 PIPE_DEPRECATED
-static INLINE void
+static inline void
 util_time_add(const struct util_time *t1,
               int64_t usecs,
               struct util_time *t2)
@@ -84,7 +84,7 @@ util_time_add(const struct util_time *t1,
  * Return difference between times, in microseconds
  */
 PIPE_DEPRECATED
-static INLINE int64_t
+static inline int64_t
 util_time_diff(const struct util_time *t1, 
                const struct util_time *t2)
 {
@@ -98,7 +98,7 @@ util_time_diff(const struct util_time *t1,
  * Not publicly available because it does not take in account wrap-arounds.
  * Use util_time_timeout instead.
  */
-static INLINE int
+static inline int
 _util_time_compare(const struct util_time *t1,
                    const struct util_time *t2)
 {
@@ -115,7 +115,7 @@ _util_time_compare(const struct util_time *t1,
  * Returns non-zero when the timeout expires.
  */
 PIPE_DEPRECATED
-static INLINE boolean
+static inline boolean
 util_time_timeout(const struct util_time *start, 
                   const struct util_time *end,
                   const struct util_time *curr)
@@ -128,7 +128,7 @@ util_time_timeout(const struct util_time *start,
  * Return current time in microseconds
  */
 PIPE_DEPRECATED
-static INLINE int64_t
+static inline int64_t
 util_time_micros(void)
 {
    return os_time_get();
@@ -136,7 +136,7 @@ util_time_micros(void)
 
 
 PIPE_DEPRECATED
-static INLINE void
+static inline void
 util_time_sleep(int64_t usecs)
 {
    os_time_sleep(usecs);
diff --git a/src/gallium/auxiliary/util/u_transfer.c b/src/gallium/auxiliary/util/u_transfer.c
index 71da35d6d39..4cb524d5cb1 100644
--- a/src/gallium/auxiliary/util/u_transfer.c
+++ b/src/gallium/auxiliary/util/u_transfer.c
@@ -90,7 +90,7 @@ void u_default_transfer_unmap( struct pipe_context *pipe,
 }
 
 
-static INLINE struct u_resource *
+static inline struct u_resource *
 u_resource( struct pipe_resource *res )
 {
    return (struct u_resource *)res;
diff --git a/src/gallium/auxiliary/util/u_video.h b/src/gallium/auxiliary/util/u_video.h
index b4743d13fbf..ddc00216105 100644
--- a/src/gallium/auxiliary/util/u_video.h
+++ b/src/gallium/auxiliary/util/u_video.h
@@ -40,7 +40,7 @@ extern "C" {
 #include "util/u_debug.h"
 #include "util/u_math.h"
 
-static INLINE enum pipe_video_format
+static inline enum pipe_video_format
 u_reduce_video_profile(enum pipe_video_profile profile)
 {
    switch (profile)
@@ -68,12 +68,19 @@ u_reduce_video_profile(enum pipe_video_profile profile)
       case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH444:
          return PIPE_VIDEO_FORMAT_MPEG4_AVC;
 
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN:
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN_STILL:
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN_12:
+      case PIPE_VIDEO_PROFILE_HEVC_MAIN_444:
+         return PIPE_VIDEO_FORMAT_HEVC;
+
       default:
          return PIPE_VIDEO_FORMAT_UNKNOWN;
    }
 }
 
-static INLINE void
+static inline void
 u_copy_nv12_to_yv12(void *const *destination_data,
                     uint32_t const *destination_pitches,
                     int src_plane, int src_field,
@@ -99,7 +106,7 @@ u_copy_nv12_to_yv12(void *const *destination_data,
    }
 }
 
-static INLINE void
+static inline void
 u_copy_yv12_to_nv12(void *const *destination_data,
                     uint32_t const *destination_pitches,
                     int src_plane, int src_field,
@@ -122,7 +129,7 @@ u_copy_yv12_to_nv12(void *const *destination_data,
    }
 }
 
-static INLINE void
+static inline void
 u_copy_swap422_packed(void *const *destination_data,
                        uint32_t const *destination_pitches,
                        int src_plane, int src_field,
@@ -147,7 +154,7 @@ u_copy_swap422_packed(void *const *destination_data,
    }
 }
 
-static INLINE uint32_t
+static inline uint32_t
 u_get_h264_level(uint32_t width, uint32_t height, uint32_t *max_reference)
 {
    uint32_t max_dpb_mbs;
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 69839e61386..afe53063b48 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -538,7 +538,7 @@ cleanup_buffers(struct vl_compositor *c)
    pipe_resource_reference(&c->vertex_buf.buffer, NULL);
 }
 
-static INLINE struct u_rect
+static inline struct u_rect
 default_rect(struct vl_compositor_layer *layer)
 {
    struct pipe_resource *res = layer->sampler_views[0]->texture;
@@ -546,21 +546,21 @@ default_rect(struct vl_compositor_layer *layer)
    return rect;
 }
 
-static INLINE struct vertex2f
+static inline struct vertex2f
 calc_topleft(struct vertex2f size, struct u_rect rect)
 {
    struct vertex2f res = { rect.x0 / size.x, rect.y0 / size.y };
    return res;
 }
 
-static INLINE struct vertex2f
+static inline struct vertex2f
 calc_bottomright(struct vertex2f size, struct u_rect rect)
 {
    struct vertex2f res = { rect.x1 / size.x, rect.y1 / size.y };
    return res;
 }
 
-static INLINE void
+static inline void
 calc_src_and_dst(struct vl_compositor_layer *layer, unsigned width, unsigned height,
                  struct u_rect src, struct u_rect dst)
 {
@@ -658,7 +658,7 @@ gen_rect_verts(struct vertex2f *vb, struct vl_compositor_layer *layer)
    vb[19].y = layer->colors[3].w;
 }
 
-static INLINE struct u_rect
+static inline struct u_rect
 calc_drawn_area(struct vl_compositor_state *s, struct vl_compositor_layer *layer)
 {
    struct vertex2f tl, br;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index abb3780f61e..52ce6c416aa 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -533,7 +533,7 @@ static struct dct_coeff tbl_B14_DC[1 << 17];
 static struct dct_coeff tbl_B14_AC[1 << 17];
 static struct dct_coeff tbl_B15[1 << 17];
 
-static INLINE void
+static inline void
 init_dct_coeff_table(struct dct_coeff *dst, const struct dct_coeff_compressed *src,
                      unsigned size, bool is_DC)
 {
@@ -594,7 +594,7 @@ init_dct_coeff_table(struct dct_coeff *dst, const struct dct_coeff_compressed *s
    }
 }
 
-static INLINE void
+static inline void
 init_tables()
 {
    vl_vlc_init_table(tbl_B1, Elements(tbl_B1), macroblock_address_increment, Elements(macroblock_address_increment));
@@ -611,19 +611,19 @@ init_tables()
    init_dct_coeff_table(tbl_B15, dct_coeff_tbl_one, Elements(dct_coeff_tbl_one), false);
 }
 
-static INLINE int
+static inline int
 DIV2DOWN(int todiv)
 {
    return (todiv&~1)/2;
 }
 
-static INLINE int
+static inline int
 DIV2UP(int todiv)
 {
    return (todiv+1)/2;
 }
 
-static INLINE void
+static inline void
 motion_vector(struct vl_mpg12_bs *bs, int r, int s, int dmv, short delta[2], short dmvector[2])
 {
    int t;
@@ -647,7 +647,7 @@ motion_vector(struct vl_mpg12_bs *bs, int r, int s, int dmv, short delta[2], sho
    }
 }
 
-static INLINE int
+static inline int
 wrap(short f, int shift)
 {
    if (f < (-16 << shift))
@@ -658,7 +658,7 @@ wrap(short f, int shift)
       return f;
 }
 
-static INLINE void
+static inline void
 motion_vector_frame(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb)
 {
    int dmv = mb->macroblock_modes.bits.frame_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME;
@@ -682,7 +682,7 @@ motion_vector_frame(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock
    }
 }
 
-static INLINE void
+static inline void
 motion_vector_field(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock *mb)
 {
    int dmv = mb->macroblock_modes.bits.field_motion_type == PIPE_MPEG12_MO_TYPE_DUAL_PRIME;
@@ -701,12 +701,12 @@ motion_vector_field(struct vl_mpg12_bs *bs, int s, struct pipe_mpeg12_macroblock
    }
 }
 
-static INLINE void
+static inline void
 reset_predictor(struct vl_mpg12_bs *bs) {
    bs->pred_dc[0] = bs->pred_dc[1] = bs->pred_dc[2] = 0;
 }
 
-static INLINE void
+static inline void
 decode_dct(struct vl_mpg12_bs *bs, struct pipe_mpeg12_macroblock *mb, int scale)
 {
    static const unsigned blk2cc[] = { 0, 0, 0, 0, 1, 2 };
@@ -805,7 +805,7 @@ entry:
       vl_vlc_eatbits(&bs->vlc, 1);
 }
 
-static INLINE void
+static inline void
 decode_slice(struct vl_mpg12_bs *bs, struct pipe_video_buffer *target)
 {
    struct pipe_mpeg12_macroblock mb;
@@ -929,6 +929,7 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_video_buffer *target)
          mb.PMV[1][0][0] = mb.PMV[0][0][0];
          mb.PMV[1][0][1] = mb.PMV[0][0][1];
          assert(extra);
+         (void) extra;
       } else if (mb.macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA ||
                 !(mb.macroblock_type & (PIPE_MPEG12_MB_TYPE_MOTION_FORWARD |
                                         PIPE_MPEG12_MB_TYPE_MOTION_BACKWARD))) {
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
index 8579460e070..b7009837293 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c
@@ -84,6 +84,9 @@ static const unsigned const_empty_block_mask_420[3][2][2] = {
 
 struct video_buffer_private
 {
+   struct list_head list;
+   struct pipe_video_buffer *video_buffer;
+
    struct pipe_sampler_view *sampler_view_planes[VL_NUM_COMPONENTS];
    struct pipe_surface      *surfaces[VL_MAX_SURFACES];
 
@@ -99,6 +102,8 @@ destroy_video_buffer_private(void *private)
    struct video_buffer_private *priv = private;
    unsigned i;
 
+   list_del(&priv->list);
+
    for (i = 0; i < VL_NUM_COMPONENTS; ++i)
       pipe_sampler_view_reference(&priv->sampler_view_planes[i], NULL);
 
@@ -126,6 +131,9 @@ get_video_buffer_private(struct vl_mpeg12_decoder *dec, struct pipe_video_buffer
 
    priv = CALLOC_STRUCT(video_buffer_private);
 
+   list_add(&priv->list, &dec->buffer_privates);
+   priv->video_buffer = buf;
+
    sv = buf->get_sampler_view_planes(buf);
    for (i = 0; i < VL_NUM_COMPONENTS; ++i)
       if (sv[i])
@@ -141,6 +149,18 @@ get_video_buffer_private(struct vl_mpeg12_decoder *dec, struct pipe_video_buffer
    return priv;
 }
 
+static void
+free_video_buffer_privates(struct vl_mpeg12_decoder *dec)
+{
+   struct video_buffer_private *priv, *next;
+
+   LIST_FOR_EACH_ENTRY_SAFE(priv, next, &dec->buffer_privates, list) {
+      struct pipe_video_buffer *buf = priv->video_buffer;
+
+      vl_video_buffer_set_associated_data(buf, &dec->base, NULL, NULL);
+   }
+}
+
 static bool
 init_zscan_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer)
 {
@@ -297,7 +317,7 @@ cleanup_mc_buffer(struct vl_mpeg12_buffer *buf)
       vl_mc_cleanup_buffer(&buf->mc[i]);
 }
 
-static INLINE void
+static inline void
 MacroBlockTypeToPipeWeights(const struct pipe_mpeg12_macroblock *mb, unsigned weights[2])
 {
    assert(mb);
@@ -332,7 +352,7 @@ MacroBlockTypeToPipeWeights(const struct pipe_mpeg12_macroblock *mb, unsigned we
    }
 }
 
-static INLINE struct vl_motionvector
+static inline struct vl_motionvector
 MotionVectorToPipe(const struct pipe_mpeg12_macroblock *mb, unsigned vector,
                    unsigned field_select_mask, unsigned weight)
 {
@@ -383,7 +403,7 @@ MotionVectorToPipe(const struct pipe_mpeg12_macroblock *mb, unsigned vector,
    return mv;
 }
 
-static INLINE void
+static inline void
 UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec,
                   struct vl_mpeg12_buffer *buf,
                   const struct pipe_mpeg12_macroblock *mb)
@@ -464,6 +484,8 @@ vl_mpeg12_destroy(struct pipe_video_codec *decoder)
 
    assert(decoder);
 
+   free_video_buffer_privates(dec);
+
    /* Asserted in softpipe_delete_fs_state() for some reason */
    dec->context->bind_vs_state(dec->context, NULL);
    dec->context->bind_fs_state(dec->context, NULL);
@@ -1187,6 +1209,8 @@ vl_create_mpeg12_decoder(struct pipe_context *context,
    if (!init_pipe_state(dec))
       goto error_pipe_state;
 
+   list_inithead(&dec->buffer_privates);
+
    return &dec->base;
 
 error_pipe_state:
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
index 2a604054387..505dd675f66 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h
@@ -30,6 +30,8 @@
 
 #include "pipe/p_video_codec.h"
 
+#include "util/list.h"
+
 #include "vl_mpeg12_bitstream.h"
 #include "vl_zscan.h"
 #include "vl_idct.h"
@@ -77,6 +79,8 @@ struct vl_mpeg12_decoder
 
    unsigned current_buffer;
    struct vl_mpeg12_buffer *dec_buffers[4];
+
+   struct list_head buffer_privates;
 };
 
 struct vl_mpeg12_buffer
diff --git a/src/gallium/auxiliary/vl/vl_rbsp.h b/src/gallium/auxiliary/vl/vl_rbsp.h
index 2e3da8e1d28..7867238c49e 100644
--- a/src/gallium/auxiliary/vl/vl_rbsp.h
+++ b/src/gallium/auxiliary/vl/vl_rbsp.h
@@ -48,7 +48,7 @@ struct vl_rbsp {
 /**
  * Initialize the RBSP object
  */
-static INLINE void vl_rbsp_init(struct vl_rbsp *rbsp, struct vl_vlc *nal, unsigned num_bits)
+static inline void vl_rbsp_init(struct vl_rbsp *rbsp, struct vl_vlc *nal, unsigned num_bits)
 {
    unsigned bits_left = vl_vlc_bits_left(nal);
 
@@ -71,7 +71,7 @@ static INLINE void vl_rbsp_init(struct vl_rbsp *rbsp, struct vl_vlc *nal, unsign
 /**
  * Make at least 16 more bits available
  */
-static INLINE void vl_rbsp_fillbits(struct vl_rbsp *rbsp)
+static inline void vl_rbsp_fillbits(struct vl_rbsp *rbsp)
 {
    unsigned valid = vl_vlc_valid_bits(&rbsp->nal);
    unsigned i, bits;
@@ -108,7 +108,7 @@ static INLINE void vl_rbsp_fillbits(struct vl_rbsp *rbsp)
 /**
  * Return an unsigned integer from the first n bits
  */
-static INLINE unsigned vl_rbsp_u(struct vl_rbsp *rbsp, unsigned n)
+static inline unsigned vl_rbsp_u(struct vl_rbsp *rbsp, unsigned n)
 {
    if (n == 0)
       return 0;
@@ -120,7 +120,7 @@ static INLINE unsigned vl_rbsp_u(struct vl_rbsp *rbsp, unsigned n)
 /**
  * Return an unsigned exponential Golomb encoded integer
  */
-static INLINE unsigned vl_rbsp_ue(struct vl_rbsp *rbsp)
+static inline unsigned vl_rbsp_ue(struct vl_rbsp *rbsp)
 {
    unsigned bits = 0;
 
@@ -134,7 +134,7 @@ static INLINE unsigned vl_rbsp_ue(struct vl_rbsp *rbsp)
 /**
  * Return an signed exponential Golomb encoded integer
  */
-static INLINE signed vl_rbsp_se(struct vl_rbsp *rbsp)
+static inline signed vl_rbsp_se(struct vl_rbsp *rbsp)
 {
    signed codeNum = vl_rbsp_ue(rbsp);
    if (codeNum & 1)
@@ -146,7 +146,7 @@ static INLINE signed vl_rbsp_se(struct vl_rbsp *rbsp)
 /**
  * Are more data available in the RBSP ?
  */
-static INLINE bool vl_rbsp_more_data(struct vl_rbsp *rbsp)
+static inline bool vl_rbsp_more_data(struct vl_rbsp *rbsp)
 {
    unsigned bits, value;
 
diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h
index 2f905956dbf..7821b8be0a1 100644
--- a/src/gallium/auxiliary/vl/vl_vlc.h
+++ b/src/gallium/auxiliary/vl/vl_vlc.h
@@ -65,7 +65,7 @@ struct vl_vlc_compressed
 /**
  * initalize and decompress a lookup table
  */
-static INLINE void
+static inline void
 vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_vlc_compressed *src, unsigned src_size)
 {
    unsigned i, bits = util_logbase2(dst_size);
@@ -87,7 +87,7 @@ vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_v
 /**
  * switch over to next input buffer
  */
-static INLINE void
+static inline void
 vl_vlc_next_input(struct vl_vlc *vlc)
 {
    unsigned len = vlc->sizes[0];
@@ -112,7 +112,7 @@ vl_vlc_next_input(struct vl_vlc *vlc)
 /**
  * align the data pointer to the next dword
  */
-static INLINE void
+static inline void
 vl_vlc_align_data_ptr(struct vl_vlc *vlc)
 {
    /* align the data pointer */
@@ -126,7 +126,7 @@ vl_vlc_align_data_ptr(struct vl_vlc *vlc)
 /**
  * fill the bit buffer, so that at least 32 bits are valid
  */
-static INLINE void
+static inline void
 vl_vlc_fillbits(struct vl_vlc *vlc)
 {
    assert(vlc);
@@ -175,7 +175,7 @@ vl_vlc_fillbits(struct vl_vlc *vlc)
 /**
  * initialize vlc structure and start reading from first input buffer
  */
-static INLINE void
+static inline void
 vl_vlc_init(struct vl_vlc *vlc, unsigned num_inputs,
             const void *const *inputs, const unsigned *sizes)
 {
@@ -203,7 +203,7 @@ vl_vlc_init(struct vl_vlc *vlc, unsigned num_inputs,
 /**
  * number of bits still valid in bit buffer
  */
-static INLINE unsigned
+static inline unsigned
 vl_vlc_valid_bits(struct vl_vlc *vlc)
 {
    return 32 - vlc->invalid_bits;
@@ -212,7 +212,7 @@ vl_vlc_valid_bits(struct vl_vlc *vlc)
 /**
  * number of bits left over all inbut buffers
  */
-static INLINE unsigned
+static inline unsigned
 vl_vlc_bits_left(struct vl_vlc *vlc)
 {
    signed bytes_left = vlc->end - vlc->data;
@@ -223,7 +223,7 @@ vl_vlc_bits_left(struct vl_vlc *vlc)
 /**
  * get num_bits from bit buffer without removing them
  */
-static INLINE unsigned
+static inline unsigned
 vl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits)
 {
    assert(vl_vlc_valid_bits(vlc) >= num_bits || vlc->data >= vlc->end);
@@ -233,7 +233,7 @@ vl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits)
 /**
  * remove num_bits from bit buffer
  */
-static INLINE void
+static inline void
 vl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits)
 {
    assert(vl_vlc_valid_bits(vlc) >= num_bits);
@@ -245,7 +245,7 @@ vl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits)
 /**
  * get num_bits from bit buffer with removing them
  */
-static INLINE unsigned
+static inline unsigned
 vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits)
 {
    unsigned value;
@@ -261,7 +261,7 @@ vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits)
 /**
  * treat num_bits as signed value and remove them from bit buffer
  */
-static INLINE signed
+static inline signed
 vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits)
 {
    signed value;
@@ -277,7 +277,7 @@ vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits)
 /**
  * lookup a value and length in a decompressed table
  */
-static INLINE int8_t
+static inline int8_t
 vl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned num_bits)
 {
    tbl += vl_vlc_peekbits(vlc, num_bits);
@@ -288,7 +288,7 @@ vl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned n
 /**
  * fast forward search for a specific byte value
  */
-static INLINE boolean
+static inline boolean
 vl_vlc_search_byte(struct vl_vlc *vlc, unsigned num_bits, uint8_t value)
 {
    /* make sure we are on a byte boundary */
@@ -345,7 +345,7 @@ vl_vlc_search_byte(struct vl_vlc *vlc, unsigned num_bits, uint8_t value)
 /**
  * remove num_bits bits starting at pos from the bitbuffer
  */
-static INLINE void
+static inline void
 vl_vlc_removebits(struct vl_vlc *vlc, unsigned pos, unsigned num_bits)
 {
    uint64_t lo = (vlc->buffer & (~0UL >> (pos + num_bits))) << num_bits;
@@ -357,7 +357,7 @@ vl_vlc_removebits(struct vl_vlc *vlc, unsigned pos, unsigned num_bits)
 /**
  * limit the number of bits left for fetching
  */
-static INLINE void
+static inline void
 vl_vlc_limit(struct vl_vlc *vlc, unsigned bits_left)
 {
    assert(bits_left <= vl_vlc_bits_left(vlc));
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index 7e61b88e6b5..3b1b87f9523 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -37,6 +37,8 @@
 #include <xf86drm.h>
 #include <errno.h>
 
+#include "loader.h"
+
 #include "pipe/p_screen.h"
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
@@ -293,6 +295,16 @@ vl_screen_get_private(struct vl_screen *vscreen)
    return vscreen;
 }
 
+static xcb_screen_t *
+get_xcb_screen(xcb_screen_iterator_t iter, int screen)
+{
+    for (; iter.rem; --screen, xcb_screen_next(&iter))
+        if (screen == 0)
+            return iter.data;
+
+    return NULL;
+}
+
 struct vl_screen*
 vl_screen_create(Display *display, int screen)
 {
@@ -334,8 +346,7 @@ vl_screen_create(Display *display, int screen)
       goto free_query;
 
    s = xcb_setup_roots_iterator(xcb_get_setup(scrn->conn));
-   while (screen--)
-	xcb_screen_next(&s);
+
    driverType = XCB_DRI2_DRIVER_TYPE_DRI;
 #ifdef DRI2DriverPrimeShift
    {
@@ -351,7 +362,7 @@ vl_screen_create(Display *display, int screen)
    }
 #endif
 
-   connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, s.data->root, driverType);
+   connect_cookie = xcb_dri2_connect_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, driverType);
    connect = xcb_dri2_connect_reply(scrn->conn, connect_cookie, NULL);
    if (connect == NULL || connect->driver_name_length + connect->device_name_length == 0)
       goto free_connect;
@@ -361,7 +372,7 @@ vl_screen_create(Display *display, int screen)
    if (!device_name)
       goto free_connect;
    memcpy(device_name, xcb_dri2_connect_device_name(connect), device_name_length);
-   fd = open(device_name, O_RDWR);
+   fd = loader_open_device(device_name);
    free(device_name);
 
    if (fd < 0)
@@ -370,7 +381,7 @@ vl_screen_create(Display *display, int screen)
    if (drmGetMagic(fd, &magic))
       goto free_connect;
 
-   authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, s.data->root, magic);
+   authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn, get_xcb_screen(s, screen)->root, magic);
    authenticate = xcb_dri2_authenticate_reply(scrn->conn, authenticate_cookie, NULL);
 
    if (authenticate == NULL || !authenticate->authenticated)
@@ -379,7 +390,7 @@ vl_screen_create(Display *display, int screen)
 #if GALLIUM_STATIC_TARGETS
    scrn->base.pscreen = dd_create_screen(fd);
 #else
-   if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd, false))
+   if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd))
       scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev, PIPE_SEARCH_DIR);
 #endif // GALLIUM_STATIC_TARGETS
author	Jason Ekstrand <[email protected]>	2015-08-14 17:25:04 -0700
committer	Jason Ekstrand <[email protected]>	2015-08-17 11:25:03 -0700
commit	6a7ca4ef2cd3f39d3b5e77051cb3f3175e9e60df (patch)
tree	d5413781ac9e9ecfc22cf403fa7465d6a7cadb34 /src/gallium/auxiliary
parent	b4c02253c4e1a7bc5a7a6369045210932f5de605 (diff)
parent	d3e23f1ff915c01541f8df375b50b93b3da565a8 (diff)